From 384b45bdef025fbd4c139b20988f6cd5b4236e24 Mon Sep 17 00:00:00 2001 From: Else Someone Date: Fri, 27 Feb 2026 18:26:41 +0200 Subject: [PATCH] pkgs.uvms: init ...with some basic optional persistence and without having to rebuild images for every app nix run -f . pkgs.uvms -- --persist-home librewolf alacritty --run librewolf --run alacritty --- examples/dummy.nix | 107 +------- pkgs/baseImage.nix | 3 + pkgs/cloud-hypervisor-gpu.nix | 59 ++++ pkgs/default.nix | 12 +- pkgs/linux-uvm.nix | 153 ++++++----- pkgs/mkSystemdDropin.nix | 39 +++ pkgs/uvms-guest/guest.py | 76 +++++ pkgs/uvms-guest/package.nix | 5 + pkgs/uvms/package.nix | 11 +- pkgs/uvms/uvms.py | 503 +++++++++++++++++++++++----------- profiles/baseImage.nix | 407 +++++++++++++++++++++++++++ profiles/ch-runner.nix | 116 +------- profiles/debug-closure.nix | 8 +- profiles/minimal.nix | 36 +++ profiles/on-failure.nix | 72 +++++ 15 files changed, 1155 insertions(+), 452 deletions(-) create mode 100644 pkgs/baseImage.nix create mode 100644 pkgs/cloud-hypervisor-gpu.nix create mode 100644 pkgs/mkSystemdDropin.nix create mode 100644 pkgs/uvms-guest/guest.py create mode 100644 pkgs/uvms-guest/package.nix create mode 100644 profiles/baseImage.nix create mode 100644 profiles/minimal.nix create mode 100644 profiles/on-failure.nix diff --git a/examples/dummy.nix b/examples/dummy.nix index 02254c9..3e5ffd9 100644 --- a/examples/dummy.nix +++ b/examples/dummy.nix @@ -7,29 +7,11 @@ }: let uvmsPkgs = pkgs.callPackage ../pkgs { }; - waylandSock = "/run/user/1000/wayland-1"; - env = { - XDG_RUNTIME_DIR = "/run/user/1000"; - WAYLAND_DISPLAY = "wayland-1"; - - MESA_LOADER_DRIVER_OVERRIDE = "zink"; - - # WAYLAND_DEBUG = "1"; - # WAYLAND_DEBUG_PROXY = "1"; - - ELECTRON_OZONE_PLATFORM_HINT = "wayland"; - MOZ_ENABLE_WAYLAND = "1"; - QT_QPA_PLATFORM = "wayland"; # Qt Applications - GDK_BACKEND = "wayland"; # GTK Applications - XDG_SESSION_TYPE = "wayland"; # Electron Applications - SDL_VIDEODRIVER = "wayland"; - CLUTTER_BACKEND = "wayland"; - NIXOS_OZONE_WL = "1"; - }; in { imports = [ - ../profiles/all.nix + ../profiles/ch-runner.nix + ../profiles/baseImage.nix (modulesPath + "/profiles/minimal.nix") ]; @@ -41,103 +23,23 @@ in _module.args.inputs = import ../npins; # boot.isContainer = true; - # boot.initrd.enable = true; boot.loader.grub.enable = false; boot.initrd.systemd.enable = true; - services.logrotate.enable = false; - services.udisks2.enable = false; - system.tools.nixos-generate-config.enable = false; - # system.activationScripts.specialfs = lib.mkForce ""; - systemd.coredump.enable = false; - # networking.firewall.enable = false; - powerManagement.enable = false; - boot.kexec.enable = false; - # console.enable = false; - # system.switch.enable = false; - # services.udev.packages = lib.mkDefault [ ]; - services.resolved.enable = false; - systemd.services.generate-shutdown-ramfs.enable = lib.mkForce false; - systemd.services.systemd-remount-fs.enable = lib.mkForce false; - systemd.services.systemd-pstore.enable = lib.mkForce false; - systemd.services.lastlog2-import.enable = lib.mkForce false; - systemd.services.suid-sgid-wrappers.enable = lib.mkForce false; - fileSystems."/" = lib.mkDefault { - device = "rootfs"; # how does this work? does this assign a label to the tmpfs? - fsType = "tmpfs"; - options = [ "size=20%,mode=0755" ]; - neededForBoot = true; - }; boot.initrd.systemd.settings.Manager.DefaultTimeoutStartSec = 5; systemd.settings.Manager.DefaultTimeoutStopSec = 10; - networking.useNetworkd = true; - networking.nftables.enable = true; uvms.cloud-hypervisor.enable = true; - systemd.sysusers.enable = false; - services.userborn.enable = true; # nikstur it - users.mutableUsers = false; - users.groups.user = { }; - users.users.user = { - isNormalUser = true; - password = "hacktheplanet!"; - extraGroups = [ - "video" - "render" - ]; - }; - users.users.root.password = "hacktheplanet!"; - systemd.services."suid-sgid-wrappers".serviceConfig = { StandardOutput = "journal+console"; StandardError = "journal+console"; }; - environment.variables = env; - systemd.globalEnvironment = env; - systemd.tmpfiles.settings."10-xdg" = { - ${env.XDG_RUNTIME_DIR}.d = { - user = "user"; - group = "user"; - mode = "0755"; - }; - }; - systemd.sockets."wayland-proxy" = { - listenStreams = [ - waylandSock - ]; - socketConfig = { - SocketUser = "user"; - SocketGroup = "user"; - FileDescriptorName = "wayland"; - }; - wantedBy = [ "sockets.target" ]; - partOf = [ "wayland-proxy.service" ]; - }; - systemd.services."wayland-proxy" = { - wantedBy = [ "default.target" ]; - serviceConfig = { - User = "user"; - Group = "user"; - ExecStart = "${lib.getExe pkgs.wayland-proxy-virtwl} --virtio-gpu"; - # ExecStart = "${lib.getExe uvmsPkgs.wl-cross-domain-proxy} --listen-fd --filter-global wp_presentation"; - ExecStartPre = [ - "+/run/current-system/sw/bin/chmod 0666 /dev/dri/card0 /dev/dri/renderD128" - ]; - StandardOutput = "journal+console"; - StandardError = "journal+console"; - Restart = "on-failure"; - RestartSec = 5; - }; - }; - fonts.enableDefaultPackages = true; - systemd.services."terminal" = { wantedBy = [ "multi-user.target" ]; wants = [ "wayland-proxy.service" ]; after = [ "wayland-proxy.service" ]; - environment = env; serviceConfig = { User = "user"; WorkingDirectory = "/home/user"; @@ -146,11 +48,6 @@ in StandardError = "journal+console"; }; }; - boot.kernelModules = [ - "drm" - "virtio_gpu" - ]; - hardware.graphics.enable = true; # TODO: cmdline, kernel, initrd, fileSystems } diff --git a/pkgs/baseImage.nix b/pkgs/baseImage.nix new file mode 100644 index 0000000..3e67ba9 --- /dev/null +++ b/pkgs/baseImage.nix @@ -0,0 +1,3 @@ +{ nixos }: + +nixos ../profiles/baseImage.nix diff --git a/pkgs/cloud-hypervisor-gpu.nix b/pkgs/cloud-hypervisor-gpu.nix new file mode 100644 index 0000000..325b372 --- /dev/null +++ b/pkgs/cloud-hypervisor-gpu.nix @@ -0,0 +1,59 @@ +{ + lib, + cloud-hypervisor, + fetchFromGitHub, + rustPlatform, + enableDebug ? true, +}: + +let + spectrum = builtins.fetchTree { + url = "https://spectrum-os.org/git/spectrum"; + type = "git"; + rev = "0f3388f0191d9a03c7bf471c269a34a79f22018b"; + }; +in +cloud-hypervisor.overrideAttrs ( + finalAttrs: oldAttrs: + { + # Verbatim from spectrum + postUnpack = oldAttrs.postUnpack or "" + '' + unpackFile $vhost + chmod -R +w vhost + ''; + vhost = fetchFromGitHub { + name = "vhost"; + owner = "rust-vmm"; + repo = "vhost"; + rev = "vhost-user-backend-v0.20.0"; + hash = "sha256-KK1+mwYQr7YkyGT9+51v7TJael9D0lle2JXfRoTqYq8="; + }; + + patches = oldAttrs.patches or [ ] ++ [ + "${spectrum}/pkgs/cloud-hypervisor/0001-build-use-local-vhost.patch" + "${spectrum}/pkgs/cloud-hypervisor/0002-virtio-devices-add-a-GPU-device.patch" + ]; + vhostPatches = builtins.concatMap ( + name: + lib.optionals (lib.hasSuffix ".patch" name) [ "${spectrum}/pkgs/cloud-hypervisor/vhost/${name}" ] + ) (builtins.attrNames (builtins.readDir "${spectrum}/pkgs/cloud-hypervisor/vhost")); + # Verbatim copy from spectrum + postPatch = oldAttrs.postPatch or "" + '' + pushd ../vhost + for patch in $vhostPatches; do + echo applying patch $patch + patch -p1 < $patch + done + popd + ''; + cargoDeps = rustPlatform.fetchCargoVendor { + inherit (finalAttrs) patches; + inherit (oldAttrs) src; + hash = "sha256-wGtsyKDg1z1QK9mJ1Q43NSjoPbm3m81p++DoD8ipIUI="; + }; + } + // lib.optionalAttrs enableDebug { + buildType = "debug"; + dontStrip = true; + } +) diff --git a/pkgs/default.nix b/pkgs/default.nix index 6c710fa..ff3fa2e 100644 --- a/pkgs/default.nix +++ b/pkgs/default.nix @@ -4,6 +4,14 @@ let in lib.makeScope newScope ( self: + let + callPackage = + fun: overrides: + let + result = self.callPackage fun overrides; + in + result // { override = result.__originalOverride or result.override; }; + in dirToAttrs ./. [ ( @@ -14,9 +22,9 @@ lib.makeScope newScope ( ( name: fpath: typ: if typ == "regular" then - self.callPackage fpath { } + callPackage fpath { } else if typ == "directory" && builtins.pathExists (fpath + "/package.nix") then - self.callPackage (fpath + "/package.nix") { } + callPackage (fpath + "/package.nix") { } else null ) diff --git a/pkgs/linux-uvm.nix b/pkgs/linux-uvm.nix index 02baab5..bd11aef 100644 --- a/pkgs/linux-uvm.nix +++ b/pkgs/linux-uvm.nix @@ -6,81 +6,86 @@ let inherit (lib.kernel) yes no unset; inherit (lib) mkForce; -in -linux_latest.override { - structuredExtraConfig = { - BASE_SMALL = yes; - DRM_VIRTIO_GPU = yes; - EROFS_FS = yes; - # TSI = yes; - DAX = yes; - FS_DAX = yes; - FUSE_DAX = yes; - OVERLAY_FS = yes; - VIRTIO_BALLOON = yes; - VIRTIO_BLK = yes; - VIRTIO_CONSOLE = yes; - VIRTIO_PCI = yes; - VIRTIO_MMIO = yes; - VIRTIO = yes; - VSOCKETS = yes; - NO_HZ_IDLE = mkForce yes; - NO_HZ_FULL = mkForce unset; - HZ_1000 = unset; - HZ_250 = yes; # NixOS default: 1000 + result = linux_latest.override { + structuredExtraConfig = { + BASE_SMALL = yes; + DRM_VIRTIO_GPU = yes; + EROFS_FS = yes; + # TSI = yes; + DAX = yes; + FS_DAX = yes; + FUSE_DAX = yes; + OVERLAY_FS = yes; + VIRTIO_BALLOON = yes; + VIRTIO_BLK = yes; + VIRTIO_CONSOLE = yes; + VIRTIO_FS = yes; + VIRTIO_MMIO = yes; + VIRTIO_PCI = yes; + VIRTIO = yes; + FUSE_FS = yes; + VSOCKETS = yes; + NO_HZ_IDLE = mkForce yes; + NO_HZ_FULL = mkForce unset; + HZ_1000 = unset; + HZ_250 = yes; # NixOS default: 1000 - EXT4_FS = yes; - # EXT4_USE_FOR_EXT2 = yes; - XFS_FS = yes; - DEFAULT_SECURITY_APPARMOR = mkForce unset; + # LSM = "lockdown,yama,loadpin,safesetid,integrity,bpf"; - XEN = mkForce unset; - XEN_BACKEND = mkForce unset; - XEN_BALLOON = mkForce unset; - XEN_BALLOON_MEMORY_HOTPLUG = mkForce unset; - XEN_DOM0 = mkForce unset; - XEN_HAVE_PVMMU = mkForce unset; - XEN_MCE_LOG = mkForce unset; - XEN_PVH = mkForce unset; - XEN_SAVE_RESTORE = mkForce unset; - XEN_SYS_HYPERVISOR = mkForce unset; - PCI_XEN = mkForce unset; - POWER_RESET_GPIO = mkForce unset; - POWER_RESET_GPIO_RESTART = mkForce unset; - RCU_LAZY = mkForce unset; - REISERFS_FS_POSIX_ACL = mkForce unset; - REISERFS_FS_SECURITY = mkForce unset; - REISERFS_FS_XATTR = mkForce unset; - SWIOTLB_XEN = mkForce unset; - SUSPEND = mkForce unset; - PM = mkForce unset; - HIBERNATION = mkForce unset; - ACPI = mkForce unset; - CPU_FREQ = mkForce unset; - CPU_FREQ_DT = mkForce unset; - INTEL_IDLE = mkForce unset; - ISA_DMA_API = mkForce unset; - IA32_EMULATION = mkForce unset; - COMPAT = mkForce unset; - COMPAT_32 = mkForce unset; - KVM = mkForce unset; - BLOCK_LEGACY_AUTOLOAD = mkForce unset; - SWAP = mkForce unset; - CMA = mkForce unset; - FB = mkForce unset; - FB_EFI = mkForce unset; - FB_VESA = mkForce unset; - SECURITY_APPARMOR = mkForce unset; + EXT4_FS = yes; + # EXT4_USE_FOR_EXT2 = yes; + XFS_FS = yes; + DEFAULT_SECURITY_APPARMOR = mkForce unset; - VT = no; - DRM_FBDEV_EMULATION = lib.mkForce no; - FONTS = mkForce unset; - FONT_8x8 = mkForce unset; - FONT_TER16x32 = mkForce unset; - FRAMEBUFFER_CONSOLE = mkForce unset; - FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER = mkForce unset; - FRAMEBUFFER_CONSOLE_DETECT_PRIMARY = mkForce unset; - FRAMEBUFFER_CONSOLE_ROTATION = mkForce unset; - RC_CORE = mkForce unset; + XEN = mkForce unset; + XEN_BACKEND = mkForce unset; + XEN_BALLOON = mkForce unset; + XEN_BALLOON_MEMORY_HOTPLUG = mkForce unset; + XEN_DOM0 = mkForce unset; + XEN_HAVE_PVMMU = mkForce unset; + XEN_MCE_LOG = mkForce unset; + XEN_PVH = mkForce unset; + XEN_SAVE_RESTORE = mkForce unset; + XEN_SYS_HYPERVISOR = mkForce unset; + PCI_XEN = mkForce unset; + POWER_RESET_GPIO = mkForce unset; + POWER_RESET_GPIO_RESTART = mkForce unset; + RCU_LAZY = mkForce unset; + REISERFS_FS_POSIX_ACL = mkForce unset; + REISERFS_FS_SECURITY = mkForce unset; + REISERFS_FS_XATTR = mkForce unset; + SWIOTLB_XEN = mkForce unset; + SUSPEND = mkForce unset; + PM = mkForce unset; + HIBERNATION = mkForce unset; + ACPI = mkForce unset; + CPU_FREQ = mkForce unset; + CPU_FREQ_DT = mkForce unset; + INTEL_IDLE = mkForce unset; + ISA_DMA_API = mkForce unset; + IA32_EMULATION = mkForce unset; + COMPAT = mkForce unset; + COMPAT_32 = mkForce unset; + KVM = mkForce unset; + BLOCK_LEGACY_AUTOLOAD = mkForce unset; + SWAP = mkForce unset; + CMA = mkForce unset; + FB = mkForce unset; + FB_EFI = mkForce unset; + FB_VESA = mkForce unset; + SECURITY_APPARMOR = mkForce unset; + + VT = no; + DRM_FBDEV_EMULATION = lib.mkForce no; + FONTS = mkForce unset; + FONT_8x8 = mkForce unset; + FONT_TER16x32 = mkForce unset; + FRAMEBUFFER_CONSOLE = mkForce unset; + FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER = mkForce unset; + FRAMEBUFFER_CONSOLE_DETECT_PRIMARY = mkForce unset; + FRAMEBUFFER_CONSOLE_ROTATION = mkForce unset; + RC_CORE = mkForce unset; + }; }; -} +in +result // { __originalOverride = result.override; } diff --git a/pkgs/mkSystemdDropin.nix b/pkgs/mkSystemdDropin.nix new file mode 100644 index 0000000..56407bc --- /dev/null +++ b/pkgs/mkSystemdDropin.nix @@ -0,0 +1,39 @@ +{ + lib, + runCommand, + writeShellScriptBin, +}: +{ + name, + prefix ? "10-all-", + dirs ? [ + "service" + "mount" + "socket" + "timer" + "target" + ], + + dropinText ? null, + extraCommands ? "", + ... +}@args: + +runCommand "${name}-dropin" + ( + lib.removeAttrs args [ + "name" + ] + // { + inherit dirs dropinText extraCommands; + } + ) + '' + set -euo pipefail + root=$out/lib/systemd/system + for dir in $dirs ; do + mkdir -p "$root/$dir".d + printf "%s" "$dropinText" > "$root/$dir.d/${prefix}${name}.conf" + done + runHook extraCommands + '' diff --git a/pkgs/uvms-guest/guest.py b/pkgs/uvms-guest/guest.py new file mode 100644 index 0000000..87b3d1a --- /dev/null +++ b/pkgs/uvms-guest/guest.py @@ -0,0 +1,76 @@ +import json +import os +import select +import socket +import subprocess + + +def handle_run(run: dict) -> dict: + res = {} + text = run.get("text", False) + env = { + **os.environ, + "PATH": ":".join( + os.environ.get("PATH", "").split(":") + run.get("EXTRA_PATH", []) + ), + } + proc = None + try: + proc = subprocess.Popen( + req["run"]["argv"], + text=text, + env=env, + cwd="/home/user", + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + res["status"] = "exec succeeded" + except Exception as e: + res["status"] = "exec failed" + res["exception"] = repr(e) + res["pid"] = getattr(proc, "pid", None) + try: + if proc is not None: + proc.wait(0.125) + res["long_running"] = False + res["returncode"] = getattr(proc, "returncode", None) + except subprocess.TimeoutExpired: + res["long_running"] = True + return res, proc + + +if __name__ == "__main__": + serv = socket.fromfd(3, socket.AF_VSOCK, socket.SOCK_STREAM) + + procs = [] + conns = [serv] + + while True: + rr, rw, xs = select.select(conns, [], []) + + for con in rr: + if con is serv: + con, (cid, port) = serv.accept() + assert cid == 2, cid + conns.append(con) + continue + req = con.recv(8192) + # IDK why but I keep getting empty messages + if req == b"": + continue + try: + req = json.loads(req) + print(f"Received {req=}") + except json.JSONDecodeError as e: + print(f"Couldn't interpret {req=}: {e}") + continue + if "run" in req: + res, proc = handle_run(req["run"]) + procs.append(proc) + else: + res = {"status": "unknown command"} + _, rw, _ = select.select([], [con], []) + assert rw, rw + res = json.dumps(res).encode("utf8") + print(f"Responding with {res=}") + con.send(res) diff --git a/pkgs/uvms-guest/package.nix b/pkgs/uvms-guest/package.nix new file mode 100644 index 0000000..66cfa2d --- /dev/null +++ b/pkgs/uvms-guest/package.nix @@ -0,0 +1,5 @@ +{ + lib, + writers, +}: +writers.writePython3Bin "uvms-guest" { } ./guest.py diff --git a/pkgs/uvms/package.nix b/pkgs/uvms/package.nix index d5e84c3..109235e 100644 --- a/pkgs/uvms/package.nix +++ b/pkgs/uvms/package.nix @@ -11,9 +11,11 @@ execline, s6, strace, - taps, util-linux, virtiofsd, + + taps, + baseImage, }: let @@ -43,5 +45,12 @@ writers.writePython3Bin "uvms" { } ( STRACE = lib.getExe strace; TAPS = "${lib.getExe taps}"; VIRTIOFSD = "${lib.getExe virtiofsd}"; + + BASE_CONFIG = baseImage.config.system.build.ch; + SYSTEM = baseImage.config.system.build.toplevel; + SYSTEM_CLOSURE = writeClosure [ + baseImage.config.system.build.toplevel + baseImage.config.system.build.ch + ]; } ) diff --git a/pkgs/uvms/uvms.py b/pkgs/uvms/uvms.py index ffdc28b..ab71c6e 100644 --- a/pkgs/uvms/uvms.py +++ b/pkgs/uvms/uvms.py @@ -9,14 +9,18 @@ import os import subprocess import socket +import json from argparse import ArgumentParser from contextlib import contextmanager, closing, ExitStack parser = ArgumentParser("supervise-vm") -parser.add_argument("--vm") +parser.add_argument("--vm", default=None) parser.add_argument("--prefix", default="$HOME/uvms/$VM") -parser.add_argument("--vm-config") +parser.add_argument("--vm-config", default="@BASE_CONFIG@") # noqa: E501 +parser.add_argument("--persist-home", action="store_true") +parser.add_argument("--run", action="append") +parser.add_argument("app", nargs="*", default=()) TOOLS_DIR = "@TOOLS@" # noqa: E501 SOCKETBINDER = TOOLS_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 @@ -27,12 +31,18 @@ VIRTIOFSD = "@VIRTIOFSD@" # noqa: E501 BWRAP = "@BWRAP@" # noqa: E501 with open("@TOOLS_CLOSURE@", mode="r") as f: # noqa: E501 - CLOSURE = [ + TOOLS_CLOSURE = [ *(ln.rstrip() for ln in f.readlines()), os.path.dirname(__file__), ] -PASSTHRU_PATH = ":".join([TOOLS_DIR]) +BASE_SYSTEM = "@SYSTEM@" # noqa: E501 +with open("@SYSTEM_CLOSURE@", mode="r") as f: # noqa: E501 + BASE_SYSTEM_CLOSURE = [ + *(ln.rstrip() for ln in f.readlines()), + ] + +PASSTHRU_PATH = ":".join([TOOLS_DIR, *os.environ.get("PATH", "").split(":")]) PASSTHRU_ENV = { **{ k: v @@ -41,6 +51,7 @@ PASSTHRU_ENV = { or k.startswith("WAYLAND") or k.startswith("XDG_") or k.startswith("DBUS_") + or k.startswith("NIX_") or k in [ "TAPS_SOCK", @@ -52,6 +63,10 @@ PASSTHRU_ENV = { def preprocess_args(args_mut): + if not args_mut.app and args_mut.run: + args_mut.app = [*args_mut.run] + if not args_mut.vm: + args_mut.vm = args_mut.run[0] keys = [k for k, v in args_mut._get_kwargs() if isinstance(v, str)] for k in keys: v = getattr(args_mut, k) @@ -86,6 +101,7 @@ class Processes: self.vm = vm self.check = check self.defaults = defaults + self.processes = [] def make_env(self): return { @@ -121,6 +137,7 @@ class Processes: kwargs["pass_fds"] = kwargs.get("pass_fds", ()) kwargs["env"] = kwargs.get("env", self.make_env()) kwargs["cwd"] = kwargs.get("cwd", self.prefix) + kwargs["text"] = kwargs.get("text", True) kwargs["stdin"] = kwargs.get("stdin", subprocess.DEVNULL) kwargs["stdout"] = kwargs.get("stdout", subprocess.DEVNULL) kwargs["stderr"] = kwargs.get("stderr", subprocess.DEVNULL) @@ -132,12 +149,19 @@ class Processes: ) if not alive_after(proc, 0.125): raise RuntimeError("Failed to start", args) + print(f"Started {args}") + self.processes.append(proc) yield proc + print(f"Releasing {args}") finally: - if alive_after(proc, 0.125): - proc.terminate() - if proc is not None: - proc.wait() + if subprocess.PIPE in (kwargs["stderr"], kwargs["stdout"]): + print(proc.communicate()) + while alive_after(proc, 0.125): + try: + proc.terminate() + proc.wait() + except Exception as e: + print(f"Cleanup failing: {e}") @contextmanager def bwrap( @@ -147,6 +171,8 @@ class Processes: # Based on the args from # `host/rootfs/image/usr/bin/run-vmm` unshare_all=True, + uid=1000, + gid=100, unshare_user=True, unshare_ipc=None, unshare_pid=None, @@ -164,7 +190,7 @@ class Processes: "/proc/sys", "/dev/null", "/proc/kallsyms", - *CLOSURE, + *sorted(set([*TOOLS_CLOSURE, *BASE_SYSTEM_CLOSURE])), ), ro_bind=(), remount_ro=("/proc/fs", "/proc/irq"), @@ -183,123 +209,128 @@ class Processes: bwrap_args_sock, remote = socket.socketpair() remote.set_inheritable(True) bwrap_args_f = bwrap_args_sock.makefile("w") - with ExitStack() as cleanup: - # cleanup.enter_context(closing(bwrap_args_sock)) - # cleanup.enter_context(closing(bwrap_args_f)) - def print_arg(*args): - print(*args, file=bwrap_args_f, sep="\0", end="\0") + def print_arg(*args): + print(*args, file=bwrap_args_f, sep="\0", end="\0") - if unshare_all: - print_arg("--unshare-all") - if unshare_user: - print_arg("--unshare-user") - if unshare_ipc: - print_arg("--unshare-ipc") - if unshare_pid: - print_arg("--unshare-pid") - if unshare_net: - print_arg("--unshare-net") - elif unshare_net is False: - print_arg("--share-net") - if unshare_uts: - print_arg("--unshare-uts") - if unshare_cgroup_try: - print_arg("--unshare-cgroup-try") - if die_with_parent: - print_arg("--die-with-parent") - if dev: - print_arg("--dev", dev) - if proc: - print_arg("--proc", proc) + if unshare_all: + print_arg("--unshare-all") + if unshare_user: + print_arg("--unshare-user") + if uid is not None: + assert unshare_user + print_arg("--uid", uid) + if gid is not None: + assert unshare_user + print_arg("--gid", gid) + if unshare_ipc: + print_arg("--unshare-ipc") + if unshare_pid: + print_arg("--unshare-pid") + if unshare_net: + print_arg("--unshare-net") + elif unshare_net is False: + print_arg("--share-net") + if unshare_uts: + print_arg("--unshare-uts") + if unshare_cgroup_try: + print_arg("--unshare-cgroup-try") + if die_with_parent: + print_arg("--die-with-parent") + if dev: + print_arg("--dev", dev) + if proc: + print_arg("--proc", proc) - for p in bind: - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--bind", p1, p2) - for p in (*ro_bind, *ro_bind_implicit): - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--ro-bind", p1, p2) - for p in (*dev_bind, *dev_bind_implicit): - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--dev-bind", p1, p2) - for p in (*tmpfs, *tmpfs_implicit): - print_arg("--tmpfs", p) - # Hunch: order might matter... - for p in remount_ro: - print_arg("--remount-ro", p) + for p in bind: + assert isinstance(p, (str, tuple)), p + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--bind", p1, p2) + for p in (*ro_bind, *ro_bind_implicit): + assert isinstance(p, (str, tuple)), p + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--ro-bind", p1, p2) + for p in (*dev_bind, *dev_bind_implicit): + assert isinstance(p, (str, tuple)), p + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--dev-bind", p1, p2) + for p in (*tmpfs, *tmpfs_implicit): + print_arg("--tmpfs", p) + # Hunch: order might matter... + for p in remount_ro: + print_arg("--remount-ro", p) - bwrap_args_f.flush() + bwrap_args_f.flush() - with ExitStack() as es: - es.enter_context(closing(remote)) - es.enter_context(closing(bwrap_args_sock)) - es.enter_context(closing(bwrap_args_f)) - proc = cleanup.enter_context( - self.popen( - "bwrap", - "--args", - str(remote.fileno()), - *bwrap_args, - **popen_kwargs, - executable=BWRAP, - pass_fds=(*pass_fds, remote.fileno()), + try: + with ExitStack() as proc_es: + with ExitStack() as es: + es.enter_context(closing(remote)) + es.enter_context(closing(bwrap_args_sock)) + es.enter_context(closing(bwrap_args_f)) + proc = proc_es.enter_context( + self.popen( + "bwrap", + "--args", + str(remote.fileno()), + *bwrap_args, + **popen_kwargs, + executable=BWRAP, + pass_fds=(*pass_fds, remote.fileno()), + ) ) - ) - yield proc + yield proc + finally: + assert proc.returncode is not None, proc @contextmanager def run_ch(self): - try: - # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) - # s.set_inheritable(True) - # s.setblocking(True) - # s.bind(self.prefix + "/vmm.sock") - args = [ - SOCKETBINDER, - "-B", - self.prefix + "/vmm.sock", - # "@STRACE@", # noqa: E501 - # "-Z", - # "-ff", - CH, - "--api-socket", - "fd=0", - # f"fd={s.fileno()}" - ] - needs_cleanup = False - with self.bwrap( - *args, - bind=[self.prefix], - # Probably just need the path to vmlinux - ro_bind=["/nix/store"], # I give up - unshare_net=False, - shell=False, - stderr=None, - # pass_fds=(s.fileno(),) - ) as proc: - # s.close() - assert alive_after(proc, 0.125) - if not os.path.exists(self.prefix + "/vmm.sock"): - raise RuntimeError( - f"{self.prefix}/vmm.sock should exist by now", - ) - needs_cleanup = True - if proc.returncode is not None: - raise RuntimeError("CH exited early") - yield proc - finally: - unlink_paths = ( - [ - self.prefix + "/vmm.sock", - self.prefix + "/vmm.sock.lock", - self.prefix + "/vsock.sock", - ] - if needs_cleanup - else [] + # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) + # s.set_inheritable(True) + # s.setblocking(True) + # s.bind(self.prefix + "/vmm.sock") + args = [ + SOCKETBINDER, + "-B", + self.prefix + "/vmm.sock", + # "@STRACE@", # noqa: E501 + # "-Z", + # "-ff", + CH, + "--api-socket", + "fd=0", + # f"fd={s.fileno()}" + ] + cleanup_paths = [ + self.prefix + "/vmm.sock", + self.prefix + "/vmm.sock.lock", + self.prefix + "/vsock.sock", + ] + new_paths = [p for p in cleanup_paths if not os.path.exists(p)] + old_paths = [p for p in cleanup_paths if p not in new_paths] + with ExitStack() as cleanup: + cleanup.enter_context(removing(*new_paths)) + proc = cleanup.enter_context( + self.bwrap( + *args, + bind=[self.prefix], + # Probably just need the path to vmlinux + # ro_bind=["/nix/store"], # I give up + unshare_net=False, + shell=False, + # pass_fds=(s.fileno(),) + ) ) - for p in unlink_paths: - if os.path.exists(p): - os.remove(p) + # s.close() + cleanup.enter_context(removing(*old_paths)) + assert alive_after(proc, 1.0), proc + if not os.path.exists(self.prefix + "/vmm.sock"): + raise RuntimeError( + f"{self.prefix}/vmm.sock should exist by now", + ) + if proc.returncode is not None: + raise RuntimeError("CH exited early") + yield proc @contextmanager def start_gpu( @@ -330,7 +361,7 @@ class Processes: with self.popen( *args, stderr=None, - ) as proc, removing(sock_path): + ) as proc, removing(sock_path, sock_path + ".lock"): yield proc, sock_path @contextmanager @@ -338,9 +369,9 @@ class Processes: self, root_dir, tag, - ro=False, + ro=True, subdirs=None, - extra_flags=("--posix-acl",), + extra_flags=("--posix-acl", "--xattr"), ): assert os.path.exists(root_dir) @@ -351,20 +382,16 @@ class Processes: # s.setblocking(True) # s.set_inheritable(True) - def rm_sock(): - if os.path.exists(sock_path): - os.remove(sock_path) - with ExitStack() as cleanup: # noqa: F841 # s.bind(sock_path.encode("utf8")) # cleanup.enter_context(closing(s)) - cleanup.enter_context(defer(rm_sock)) + cleanup.enter_context(removing(sock_path, sock_path + ".pid")) args = [ # If using bwrap(): # "--argv0", "virtiofsd", # "--uid", "1000", - # "--gid", "1000", + # "--gid", "100", # "--", "unshare", "-rUm", @@ -372,7 +399,7 @@ class Processes: "--map-user", "1000", "--map-group", - "1000", + "100", VIRTIOFSD, "--shared-dir", root_dir, @@ -396,6 +423,8 @@ class Processes: # if subdirs is not None # else [root_dir], # "pass_fds": (2, s.fileno()), + "stdout": subprocess.PIPE, + "stderr": subprocess.PIPE, } try: with self.popen(*args, **kwargs) as p: @@ -423,20 +452,43 @@ def removing(*paths): os.remove(p) -if __name__ == "__main__": - args, args_next = parser.parse_known_args() - preprocess_args(args) +def connect_ch_vsock( + vsock_sock_path, + port: int, + type=socket.SOCK_STREAM, + blocking=True, +) -> socket.socket: + s = socket.socket(socket.AF_UNIX, type, 0) + s.setblocking(blocking) + s.connect(vsock_sock_path) + s.send(b"CONNECT %d\n" % port) + return s + + +@contextmanager +def listen_ch_vsock( + vsock_sock_path, + port: int, + type=socket.SOCK_STREAM, + blocking=True, +) -> socket.socket: + listen_path = vsock_sock_path + "_%d" % port + s = socket.socket(socket.AF_UNIX, type, 0) + s.setblocking(blocking) + s.bind(listen_path) + s.listen() + try: + yield s + finally: + os.remove(listen_path) + + +def main(args, args_next, cleanup, ps): send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}" os.makedirs(send_dir, exist_ok=True) os.makedirs(args.prefix, exist_ok=True) - os.makedirs(args.prefix + "/pts", exist_ok=True) - - ps = Processes( - prefix=args.prefix, - vm=args.vm, - ) ch_remote = [ "ch-remote", @@ -444,31 +496,170 @@ if __name__ == "__main__": args.prefix + "/vmm.sock", ] - with ExitStack() as cleanup: + with open(args.vm_config) as f: + config = json.load(f) - vfsd, vfsd_path = cleanup.enter_context( + app_paths = [] + for a in args.app: + out_path = ps.exec( + "nix-build", + "", + "-A", + a, + "--no-out-link", + capture_output=True, + text=True, + ).stdout.strip() + assert out_path.startswith("/nix/store/") + app_paths.append(out_path) + apps_closure = ps.exec( # noqa: F841 + "nix-store", + "-qR", + *app_paths, + capture_output=True, + text=True, + ).stdout.split() + + ready_sock = cleanup.enter_context( + listen_ch_vsock(ps.prefix + "/vsock.sock", 8888), + ) + + virtiofs_socks = [] + _, sock_path = cleanup.enter_context( + ps.start_virtiofsd( + send_dir, + tag="send", + ro=False, + ) + ) + virtiofs_socks.append(("send", sock_path)) + _, sock_path = cleanup.enter_context( + ps.start_virtiofsd( + "/nix/store", + subdirs=apps_closure, + tag="apps", + ) + ) + virtiofs_socks.append(("apps", sock_path)) + _, sock_path = cleanup.enter_context( + ps.start_virtiofsd( + "/nix/store", + subdirs=BASE_SYSTEM_CLOSURE, + tag="system", + ) + ) + virtiofs_socks.append(("system", sock_path)) + + if args.persist_home: + os.makedirs(args.prefix + "/home", exist_ok=True) + _, sock_path = cleanup.enter_context( ps.start_virtiofsd( - send_dir, - tag="send", + args.prefix + "/home", + subdirs=BASE_SYSTEM_CLOSURE, + tag="home", + ro=False, ) ) - gpud, gpud_path = cleanup.enter_context(ps.start_gpu()) + virtiofs_socks.append(("home", sock_path)) + config["payload"]["cmdline"] += " uvms.persist-home=1" - ch = cleanup.enter_context(ps.run_ch()) - ps.exec(*ch_remote, "create", args.vm_config) - ps.exec( - TAPS, - "pass", - *ch_remote, - "add-net", - "id=wan,fd=3,mac=00:00:00:00:00:01", - ) + gpud, gpud_path = cleanup.enter_context(ps.start_gpu()) - ps.exec(*ch_remote, "add-fs", f"tag=send,socket={vfsd_path},id=send") - ps.exec(*ch_remote, "add-gpu", f"socket={gpud_path}") - ps.exec(*ch_remote, "boot") - ps.exec(*ch_remote, "info") + ch = cleanup.enter_context(ps.run_ch()) + + ps.exec( + *ch_remote, + "create", + input=json.dumps(config), + text=True, + ) + ps.exec( + TAPS, + "pass", + *ch_remote, + "add-net", + "id=wan,fd=3,mac=00:00:00:00:00:01", + ) + + # TODO: add-fs apps closure separately + for tag, sock_path in virtiofs_socks: + ps.exec(*ch_remote, "add-fs", f"tag={tag},socket={sock_path},id={tag}") + ps.exec(*ch_remote, "add-gpu", f"socket={gpud_path}") + ps.exec(*ch_remote, "boot") + ps.exec(*ch_remote, "info") + + with ready_sock: + ready_sock.settimeout(16.0) try: - ch.wait() - except KeyboardInterrupt: - pass + con, _ = ready_sock.accept() + except: # noqa: E722 + print( + "CH didn't try connecting to the readiness notification socket" + ) # noqa: E501 + else: + with con: + msg = con.recv(128) + assert msg.startswith(b"READY=1"), msg + + with connect_ch_vsock(ps.prefix + "/vsock.sock", 24601) as guest: + for r in args.run: + try: + guest.send( + json.dumps( + { + "run": { + "argv": [r], + "EXTRA_PATH": [ + f"{a}/bin" for a in app_paths + ], # noqa: E501 + } + } + ).encode("utf8") + ) + res = guest.recv(8192) + try: + res = json.loads(guest.recv(8192)) + except json.JSONDecodeError as e: + print(f"Couldn't interpret --run {r} response: {e} {res}") + continue + adverb = ( + "Successfully" + if res["status"] == "exec succeeded" + else "Failed to" # noqa: E501 + ) + print(f"{adverb} --run {r}: {res}") + except Exception as e: + print(f"Couldn't --run {r}: {repr(e)}") + try: + ch.wait() + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + args, args_next = parser.parse_known_args() + preprocess_args(args) + ps = Processes( + prefix=args.prefix, + vm=args.vm, + ) + + try: + with ExitStack() as cleanup: + main(args, args_next, cleanup, ps) + finally: + for p in ps.processes: + if p.returncode is not None: + continue + try: + print(f"Cleanup failed. Re-trying the killing of {p}") + p.terminate() + except: # noqa: E722 + pass + for p in ps.processes: + if p.returncode is not None: + continue + try: + p.wait() + except: # noqa: E722 + pass diff --git a/profiles/baseImage.nix b/profiles/baseImage.nix new file mode 100644 index 0000000..87f8df5 --- /dev/null +++ b/profiles/baseImage.nix @@ -0,0 +1,407 @@ +{ + lib, + config, + modulesPath, + pkgs, + ... +}: +let + inherit (lib) mkOption types concatStringsSep; + jsonType = (pkgs.formats.json { }).type; + + inherit (config.system.build) initialRamdisk; + inherit (config.system.boot.loader) initrdFile; + inherit (config.boot.kernelPackages) kernel; + kernelTarget = pkgs.stdenv.hostPlatform.linux-kernel.target; + uvmsPkgs = pkgs.callPackage ../pkgs { }; + waylandSock = "/run/user/1000/wayland-1"; + env = { + XDG_RUNTIME_DIR = "/run/user/1000"; + WAYLAND_DISPLAY = "wayland-1"; + + # MESA_LOADER_DRIVER_OVERRIDE = "zink"; + + ELECTRON_OZONE_PLATFORM_HINT = "wayland"; + MOZ_ENABLE_WAYLAND = "1"; + QT_QPA_PLATFORM = "wayland"; # Qt Applications + GDK_BACKEND = "wayland"; # GTK Applications + XDG_SESSION_TYPE = "wayland"; # Electron Applications + SDL_VIDEODRIVER = "wayland"; + CLUTTER_BACKEND = "wayland"; + NIXOS_OZONE_WL = "1"; + }; +in +{ + imports = [ + (modulesPath + "/profiles/minimal.nix") + ./debug-closure.nix + ./minimal.nix + ./on-failure.nix + ]; + config = { + some.failure-handler.enable = true; + hardware.graphics.enable = true; + # boot.kernelPackages = pkgs.linuxPackagesFor uvmsPkgs.linux-uvm; + # boot.isContainer = true; + boot.initrd.kernelModules = [ + "drm" + "virtio_blk" + "virtiofs" + "virtio_gpu" + "virtio_mmio" + "virtio_pci" + "overlay" + ]; + boot.kernelModules = [ + "drm" + "erofs" + "overlay" + "virtio_blk" + "virtiofs" + "virtio_gpu" + "virtio_mmio" + "virtio_pci" + ]; + boot.initrd.systemd.initrdBin = [ + pkgs.fuse + pkgs.fuse3 + ]; + fileSystems = { + "/" = lib.mkDefault { + device = "rootfs"; # how does this work? does this assign a label to the tmpfs? + fsType = "tmpfs"; + options = [ "size=20%,mode=0755" ]; + neededForBoot = true; + }; + "/nix/store" = { + fsType = "overlay"; + overlay.lowerdir = [ + "/nix/.ro-stores/system" + "/nix/.ro-stores/apps" + ]; + neededForBoot = true; + }; + "/nix/.ro-stores/system" = { + device = "system"; + fsType = "virtiofs"; + options = [ + "defaults" + "ro" + "x-systemd.requires=systemd-modules-load.service" + ]; + neededForBoot = true; + }; + "/nix/.ro-stores/apps" = { + device = "apps"; + fsType = "virtiofs"; + options = [ + "defaults" + "ro" + "x-systemd.requires=systemd-modules-load.service" + ]; + neededForBoot = true; + }; + }; + + systemd.mounts = [ + { + type = "virtiofs"; + where = "/home/user"; + what = "home"; + after = [ "systemd-modules-load.service" ]; + wantedBy = [ "local-fs.target" ]; + before = [ "local-fs.target" ]; + requires = [ "systemd-modules-load.service" ]; + options = lib.concatStringsSep "," [ + "defaults" + "rw" + "X-mount.owner=1000" + "X-mount.group=100" + ]; + unitConfig = { + ConditionKernelCommandLine = "uvms.persist-home=1"; + }; + } + { + type = "virtiofs"; + where = "/home/user/send"; + what = "send"; + wants = [ + "home-user.mount" + "-.mount" + ]; + after = [ + "systemd-modules-load.service" + "home-user.mount" + "-.mount" + ]; + wantedBy = [ "local-fs.target" ]; + before = [ "local-fs.target" ]; + options = lib.concatStringsSep "," [ + "defaults" + "rw" + "X-mount.owner=1000" + "X-mount.group=100" + ]; + unitConfig = { + DefaultDependencies = false; + }; + } + ]; + # systemd.services."mount-home-user-send" = { + # wants = [ "home-user.mount" ]; + # after = [ + # "systemd-modules-load.service" + # "home-user.mount" + # "-.mount" + # ]; + # wantedBy = [ "local-fs.target" ]; + # before = [ "local-fs.target" ]; + # unitConfig = { + # DefaultDependencies = false; + # }; + # environment.PATH = lib.mkForce ( + # lib.makeBinPath [ + # pkgs.fuse + # pkgs.fuse3 + # pkgs.coreutils + # ] + # ); + # serviceConfig = { + # Type = "oneshot"; + # RemainsAfterExit = true; + # ExecStart = [ + # "/run/current-system/sw/bin/mkdir -p /home/user/send" + # "/run/current-system/sw/bin/chown user /home/user/send" + # "/run/current-system/sw/sbin/mount -t virtiofs -o defaults,rw send /home/user/send" + # ]; + # StandardOutput = "journal+console"; + # StandardError = "journal+console"; + # }; + # }; + + systemd.network.enable = true; + networking.useNetworkd = true; + networking.nftables.enable = true; + networking.useDHCP = true; + networking.nameservers = [ "1.1.1.1" ]; + services.resolved.enable = lib.mkForce true; + + system.activationScripts.specialfs = lib.mkForce ""; + # networking.firewall.enable = false; + console.enable = false; + services.udev.packages = lib.mkDefault [ ]; + systemd.services."systemd-oomd".enable = false; + + users.mutableUsers = false; + users.users.root.password = "hacktheplanet!"; + users.groups.users = { }; + users.users.user = { + uid = 1000; + isNormalUser = true; + password = "hacktheplanet!"; + extraGroups = [ + "video" + "render" + "users" + "wheel" + ]; + }; + + environment.variables = env; + systemd.globalEnvironment = env; + + systemd.tmpfiles.settings."10-xdg" = { + ${env.XDG_RUNTIME_DIR}.d = { + user = "user"; + group = "users"; + mode = "0755"; + }; + }; + + systemd.sockets."wayland-proxy" = { + listenStreams = [ + waylandSock + ]; + socketConfig = { + SocketUser = "user"; + SocketGroup = "users"; + FileDescriptorName = "wayland"; + }; + wantedBy = [ "sockets.target" ]; + partOf = [ "wayland-proxy.service" ]; + }; + systemd.services."wayland-proxy" = { + wantedBy = [ "default.target" ]; + serviceConfig = { + User = "user"; + Group = "users"; + ExecStart = "${lib.getExe pkgs.wayland-proxy-virtwl} --virtio-gpu"; + # ExecStart = "${lib.getExe uvmsPkgs.wl-cross-domain-proxy} --listen-fd --filter-global wp_presentation"; + ExecStartPre = [ + "+/run/current-system/sw/bin/chmod 0666 /dev/dri/card0 /dev/dri/renderD128" + ]; + StandardOutput = "journal+console"; + StandardError = "journal+console"; + Restart = "on-failure"; + RestartSec = 5; + }; + }; + + systemd.sockets."uvms-guest" = { + wantedBy = [ "default.target" ]; + listenStreams = [ + "vsock::24601" + ]; + partOf = [ "uvms-guest.service" ]; + }; + systemd.services."uvms-guest" = { + serviceConfig = { + User = "user"; + Group = "users"; + ExecStart = "${lib.getExe uvmsPkgs.uvms-guest}"; + StandardOutput = "journal+console"; + StandardError = "journal+console"; + Restart = "on-failure"; + RestartSec = 5; + }; + }; + + fonts.enableDefaultPackages = true; + + boot.kernelParams = [ + "earlyprintk=ttyS0" + "console=ttyS0" + "reboot=t" + "panic=-1" + "io.systemd.credential:vmm.notify_socket=vsock-stream:2:8888" + # "rootfstype=virtiofs" + # "root=rootstore" + ]; + }; + + options = { + system.build.ch = mkOption { + type = types.package; + default = (pkgs.formats.json { }).generate "vm.json" config.uvms.ch.settings; + }; + uvms.ch.settings = mkOption { + default = { }; + type = types.submodule { + freeformType = jsonType; + options = { + payload = { + cmdline = mkOption { + type = types.str; + default = concatStringsSep " " ( + config.boot.kernelParams + ++ [ + # "init=${lib.removePrefix "/nix/store" "${config.system.build.toplevel}"}/init" + "init=${config.system.build.toplevel}/init" + ] + ); + defaultText = ''concatStringsSep " " ${config.boot.kernelParams}''; + }; + kernel = mkOption { + type = types.str; + default = "${kernel}/${kernelTarget}"; + }; + initramfs = mkOption { + type = types.nullOr types.str; + default = "${initialRamdisk}/${initrdFile}"; + }; + }; + vsock = { + cid = mkOption { + type = types.int; + default = 4; + }; + socket = mkOption { + type = types.str; + default = "vsock.sock"; + }; + }; + "api-socket" = mkOption { + type = types.str; + default = "vmm.sock"; + }; + "serial".mode = mkOption { + type = types.str; + default = "File"; + }; + "serial".file = mkOption { + type = types.nullOr types.str; + default = "serial"; + }; + "console".mode = mkOption { + type = types.str; + default = "Pty"; + }; + "console".file = mkOption { + type = types.nullOr types.str; + default = null; + }; + # "watchdog" = true; + # "seccomp" = true; + disks = mkOption { + default = [ ]; + type = types.listOf ( + types.submodule { + freeformType = jsonType; + options = { + path = mkOption { + type = types.oneOf [ + types.path + types.str + ]; + }; + readonly = mkOption { + type = types.bool; + default = true; + }; + id = mkOption { type = types.str; }; + }; + } + ); + }; + memory = mkOption { + default = { }; + type = types.submodule { + freeformType = jsonType; + options = { + size = mkOption { + type = types.int; + default = 1536 * 1048576; + }; + shared = mkOption { + type = types.bool; + default = true; + }; + mergeable = mkOption { + type = types.bool; + default = true; + }; + }; + }; + }; + cpus = mkOption { + default = { }; + type = types.submodule { + freeformType = jsonType; + options = { + boot_vcpus = mkOption { + type = types.int; + default = 4; + }; + max_vcpus = mkOption { + type = types.int; + default = 4; + }; + }; + }; + }; + }; + }; + }; + }; +} diff --git a/profiles/ch-runner.nix b/profiles/ch-runner.nix index ad3685f..ef32247 100644 --- a/profiles/ch-runner.nix +++ b/profiles/ch-runner.nix @@ -9,7 +9,7 @@ # but we shall begin by reproducing at least some of their work. let - cfg = config.uvms.cloud-hypervisor; + cfg = config.uvms.ch; inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; @@ -48,69 +48,21 @@ let in { options = { - uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; - uvms.cloud-hypervisor.runner = mkOption { + uvms.ch.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; + uvms.ch.runner = mkOption { type = types.package; description = "A naive script for running this system in cloud-hypervisor"; }; - uvms.cloud-hypervisor.debugger = mkOption { + uvms.ch.debugger = mkOption { type = types.lazyAttrsOf types.anything; description = "Same but you can debug the kernel"; }; - uvms.cloud-hypervisor.settingsFile = mkOption { + uvms.ch.settingsFile = mkOption { type = types.package; default = chSettingsFile; defaultText = "..."; readOnly = true; }; - uvms.cloud-hypervisor.settings = mkOption { - default = { }; - type = types.submodule { - freeformType = (pkgs.formats.json { }).type; - options = { - payload = { - cmdline = mkOption { type = types.str; }; - kernel = mkOption { type = types.str; }; - initramfs = mkOption { - type = types.str; - default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; - }; - }; - vsock = { - cid = mkOption { - type = types.int; - default = 4; - }; - socket = mkOption { - type = types.str; - default = "vsock.sock"; - }; - }; - "api-socket" = mkOption { - type = types.str; - default = "vmm.sock"; - }; - "serial".mode = mkOption { - type = types.str; - default = "File"; - }; - "serial".file = mkOption { - type = types.nullOr types.str; - default = "serial"; - }; - "console".mode = mkOption { - type = types.str; - default = "Pty"; - }; - "console".file = mkOption { - type = types.nullOr types.str; - default = null; - }; - # "watchdog" = true; - # "seccomp" = true; - }; - }; - }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; @@ -118,44 +70,24 @@ in uvms.cloud-hypervisor.cmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ - "earlyprintk=ttyS0" - "console=ttyS0" - "reboot=t" - "panic=-1" - "init=${config.system.build.toplevel}/init" ] ++ config.boot.kernelParams ++ config.uvms.cloud-hypervisor.extraCmdline; }; }; + imports = [ ./baseImage.nix ]; config = lib.mkMerge [ { # boot.kernelPackages = pkgs.linuxPackagesFor (uvmsPkgs.linux-uvm); - uvms.cloud-hypervisor.settings = { - payload = { - cmdline = lib.concatStringsSep " " cfg.cmdline; - kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; - }; - disks = map (img: { - path = img; - readonly = true; - id = toString img.label; - }) layers; + uvms.ch.settings = { memory = { - size = 1536 * 1048576; - shared = true; - mergeable = true; # hotplugged_size = 512 * 1048576; # hotplugd_size = 1536 * 1048576; # hotplug_method = "virtio-mem" }; - cpus = { - boot_vcpus = 4; - max_vcpus = 4; - }; }; - uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( + uvms.ch.debugger = pkgs.testers.runNixOSTest ( { config, ... }: { name = "test-run-${hostName}"; @@ -265,39 +197,9 @@ in ); # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum - uvms.cloud-hypervisor.runner = writeElb "run-${hostName}" '' + uvms.ch.runner = writeElb "run-${hostName}" '' ${lib.getExe uvmsPkgs.uvms} --vm-config=${chSettingsFile} --vm=${hostName} ''; } - (lib.mkIf cfg.enable { - boot.initrd.availableKernelModules = [ - "erofs" - "overlay" - "virtio_mmio" - "virtio_pci" - "virtio_blk" - # "9pnet_virtio" - # "9p" - "virtiofs" - ]; - boot.initrd.systemd.enable = lib.mkDefault true; - fileSystems = { - "/nix/store" = { - fsType = "overlay"; - overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers; - neededForBoot = true; - }; - } - // lib.listToAttrs ( - map ( - img: - lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" { - device = "/dev/disk/by-label/${img.label}"; - neededForBoot = true; - options = [ "x-systemd.device-timeout=5" ]; - } - ) layers - ); - }) ]; } diff --git a/profiles/debug-closure.nix b/profiles/debug-closure.nix index d1772da..86137c0 100644 --- a/profiles/debug-closure.nix +++ b/profiles/debug-closure.nix @@ -15,15 +15,9 @@ let inherit (ps) writeErofsLayers; emptySystem = import (pkgs.path + "/nixos/lib/eval-config.nix") { modules = [ - (modulesPath + "/profiles/minimal.nix") + ./minimal.nix { system.stateVersion = config.system.stateVersion; - fileSystems."/".fsType = "tmpfs"; - boot.loader.grub.enable = false; - networking.hostName = "base"; - networking.nftables.enable = true; - networking.useNetworkd = true; - systemd.network.enable = true; } ]; }; diff --git a/profiles/minimal.nix b/profiles/minimal.nix new file mode 100644 index 0000000..1ac85c8 --- /dev/null +++ b/profiles/minimal.nix @@ -0,0 +1,36 @@ +{ + lib, + config, + modulesPath, + ... +}: +{ + imports = [ + (modulesPath + "/profiles/minimal.nix") + ]; + boot.loader.grub.enable = false; + boot.initrd.systemd.enable = true; + networking.useNetworkd = true; + networking.nftables.enable = config.networking.firewall.enable || config.networking.nat.enable; + fileSystems."/".fsType = lib.mkDefault "tmpfs"; + networking.hostName = lib.mkDefault "base"; + + systemd.sysusers.enable = false; + services.userborn.enable = true; # nikstur it + + nix.enable = false; + services.logrotate.enable = false; + services.udisks2.enable = false; + system.tools.nixos-generate-config.enable = false; + systemd.coredump.enable = false; + powerManagement.enable = false; + boot.kexec.enable = false; + system.switch.enable = false; + services.resolved.enable = false; + + systemd.services.generate-shutdown-ramfs.enable = lib.mkForce false; + systemd.services.systemd-remount-fs.enable = lib.mkForce false; + systemd.services.systemd-pstore.enable = lib.mkForce false; + systemd.services.lastlog2-import.enable = lib.mkForce false; + # systemd.services.suid-sgid-wrappers.enable = lib.mkForce false; +} diff --git a/profiles/on-failure.nix b/profiles/on-failure.nix new file mode 100644 index 0000000..c5c256d --- /dev/null +++ b/profiles/on-failure.nix @@ -0,0 +1,72 @@ +{ + lib, + config, + pkgs, + ... +}: +let + cfg = config.some.failure-handler; + jobScript = pkgs.writeShellScriptBin "show-status" '' + set -euo pipefail + + export PATH=${lib.getBin config.boot.initrd.systemd.package}/bin''${PATH:+:}$PATH + export PATH=${lib.getBin pkgs.util-linux}/bin''${PATH:+:}$PATH + export PATH=${lib.getBin pkgs.gnugrep}/bin''${PATH:+:}$PATH + + unit="$1" + shift + + systemctl status "$unit" >&2 || true + patterns=$unit$'\n'error + dmesg | grep -Fi "$patterns" || true + ''; + mkSystemdDropin = pkgs.callPackage ../pkgs/mkSystemdDropin.nix { }; +in +{ + options.some.failure-handler = { + enable = lib.mkEnableOption "Set up show-status@.service as a default OnFailure dependency"; + stage-1.enable = + lib.mkEnableOption "Set up show-status@.service as a default OnFailure dependency in initramfs/initrd" + // { + default = cfg.enable; + }; + package = lib.mkOption { + type = lib.types.package; + readOnly = true; + description = "The internal package with the drop-ins"; + }; + }; + config = { + some.failure-handler.package = mkSystemdDropin { + name = "status-on-failure"; + inherit jobScript; + dropinText = '' + [Unit] + OnFailure=status@%n.service + ''; + serviceText = '' + [Unit] + DefaultDependencies=no + Description=Show status for %i + + [Service] + Type=oneshot + StandardOutput=journal+console + StandardError=journal+console + ExecStart=${lib.getExe jobScript} "%i" + JoinsNamespaceOf= + DelegateNamespaces= + ''; + extraCommands = '' + printf "%s" "$serviceText" > "$root/status@.service" + ''; + }; + boot.initrd.systemd.packages = lib.optionals cfg.stage-1.enable [ cfg.package ]; + boot.initrd.systemd.storePaths = lib.optionals cfg.stage-1.enable [ + jobScript + pkgs.util-linux + pkgs.gnugrep + ]; + systemd.packages = lib.optionals cfg.enable [ cfg.package ]; + }; +}