diff --git a/examples/dummy.nix b/examples/dummy.nix index 3e5ffd9..7d8c813 100644 --- a/examples/dummy.nix +++ b/examples/dummy.nix @@ -5,13 +5,9 @@ modulesPath, ... }: -let - uvmsPkgs = pkgs.callPackage ../pkgs { }; -in { imports = [ - ../profiles/ch-runner.nix - ../profiles/baseImage.nix + ../profiles/all.nix (modulesPath + "/profiles/minimal.nix") ]; @@ -22,32 +18,30 @@ in vmapps.enable = true; _module.args.inputs = import ../npins; - # boot.isContainer = true; + # following microvm.nix: boot.loader.grub.enable = false; boot.initrd.systemd.enable = true; - - boot.initrd.systemd.settings.Manager.DefaultTimeoutStartSec = 5; + fileSystems."/" = lib.mkDefault { + device = "rootfs"; # how does this work? does this assign a label to the tmpfs? + fsType = "tmpfs"; + options = [ "size=20%,mode=0755" ]; + neededForBoot = true; + }; + boot.initrd.systemd.settings.Manager.DefaultTimeoutStartSec = 30; systemd.settings.Manager.DefaultTimeoutStopSec = 10; + networking.useNetworkd = true; + networking.nftables.enable = true; uvms.cloud-hypervisor.enable = true; + users.mutableUsers = false; + users.users.root.password = "hacktheplanet!"; + services.getty.autologinUser = "root"; + systemd.services."suid-sgid-wrappers".serviceConfig = { StandardOutput = "journal+console"; StandardError = "journal+console"; }; - systemd.services."terminal" = { - wantedBy = [ "multi-user.target" ]; - wants = [ "wayland-proxy.service" ]; - after = [ "wayland-proxy.service" ]; - serviceConfig = { - User = "user"; - WorkingDirectory = "/home/user"; - ExecStart = lib.getExe pkgs.alacritty; - StandardOutput = "journal+console"; - StandardError = "journal+console"; - }; - }; - # TODO: cmdline, kernel, initrd, fileSystems } diff --git a/pkgs/baseImage.nix b/pkgs/baseImage.nix deleted file mode 100644 index 3e67ba9..0000000 --- a/pkgs/baseImage.nix +++ /dev/null @@ -1,3 +0,0 @@ -{ nixos }: - -nixos ../profiles/baseImage.nix diff --git a/pkgs/cloud-hypervisor-gpu.nix b/pkgs/cloud-hypervisor-gpu.nix deleted file mode 100644 index 325b372..0000000 --- a/pkgs/cloud-hypervisor-gpu.nix +++ /dev/null @@ -1,59 +0,0 @@ -{ - lib, - cloud-hypervisor, - fetchFromGitHub, - rustPlatform, - enableDebug ? true, -}: - -let - spectrum = builtins.fetchTree { - url = "https://spectrum-os.org/git/spectrum"; - type = "git"; - rev = "0f3388f0191d9a03c7bf471c269a34a79f22018b"; - }; -in -cloud-hypervisor.overrideAttrs ( - finalAttrs: oldAttrs: - { - # Verbatim from spectrum - postUnpack = oldAttrs.postUnpack or "" + '' - unpackFile $vhost - chmod -R +w vhost - ''; - vhost = fetchFromGitHub { - name = "vhost"; - owner = "rust-vmm"; - repo = "vhost"; - rev = "vhost-user-backend-v0.20.0"; - hash = "sha256-KK1+mwYQr7YkyGT9+51v7TJael9D0lle2JXfRoTqYq8="; - }; - - patches = oldAttrs.patches or [ ] ++ [ - "${spectrum}/pkgs/cloud-hypervisor/0001-build-use-local-vhost.patch" - "${spectrum}/pkgs/cloud-hypervisor/0002-virtio-devices-add-a-GPU-device.patch" - ]; - vhostPatches = builtins.concatMap ( - name: - lib.optionals (lib.hasSuffix ".patch" name) [ "${spectrum}/pkgs/cloud-hypervisor/vhost/${name}" ] - ) (builtins.attrNames (builtins.readDir "${spectrum}/pkgs/cloud-hypervisor/vhost")); - # Verbatim copy from spectrum - postPatch = oldAttrs.postPatch or "" + '' - pushd ../vhost - for patch in $vhostPatches; do - echo applying patch $patch - patch -p1 < $patch - done - popd - ''; - cargoDeps = rustPlatform.fetchCargoVendor { - inherit (finalAttrs) patches; - inherit (oldAttrs) src; - hash = "sha256-wGtsyKDg1z1QK9mJ1Q43NSjoPbm3m81p++DoD8ipIUI="; - }; - } - // lib.optionalAttrs enableDebug { - buildType = "debug"; - dontStrip = true; - } -) diff --git a/pkgs/default.nix b/pkgs/default.nix index ff3fa2e..6c710fa 100644 --- a/pkgs/default.nix +++ b/pkgs/default.nix @@ -4,14 +4,6 @@ let in lib.makeScope newScope ( self: - let - callPackage = - fun: overrides: - let - result = self.callPackage fun overrides; - in - result // { override = result.__originalOverride or result.override; }; - in dirToAttrs ./. [ ( @@ -22,9 +14,9 @@ lib.makeScope newScope ( ( name: fpath: typ: if typ == "regular" then - callPackage fpath { } + self.callPackage fpath { } else if typ == "directory" && builtins.pathExists (fpath + "/package.nix") then - callPackage (fpath + "/package.nix") { } + self.callPackage (fpath + "/package.nix") { } else null ) diff --git a/pkgs/linux-uvm.nix b/pkgs/linux-uvm.nix deleted file mode 100644 index bd11aef..0000000 --- a/pkgs/linux-uvm.nix +++ /dev/null @@ -1,91 +0,0 @@ -{ - lib, - linux_latest, -}: - -let - inherit (lib.kernel) yes no unset; - inherit (lib) mkForce; - result = linux_latest.override { - structuredExtraConfig = { - BASE_SMALL = yes; - DRM_VIRTIO_GPU = yes; - EROFS_FS = yes; - # TSI = yes; - DAX = yes; - FS_DAX = yes; - FUSE_DAX = yes; - OVERLAY_FS = yes; - VIRTIO_BALLOON = yes; - VIRTIO_BLK = yes; - VIRTIO_CONSOLE = yes; - VIRTIO_FS = yes; - VIRTIO_MMIO = yes; - VIRTIO_PCI = yes; - VIRTIO = yes; - FUSE_FS = yes; - VSOCKETS = yes; - NO_HZ_IDLE = mkForce yes; - NO_HZ_FULL = mkForce unset; - HZ_1000 = unset; - HZ_250 = yes; # NixOS default: 1000 - - # LSM = "lockdown,yama,loadpin,safesetid,integrity,bpf"; - - EXT4_FS = yes; - # EXT4_USE_FOR_EXT2 = yes; - XFS_FS = yes; - DEFAULT_SECURITY_APPARMOR = mkForce unset; - - XEN = mkForce unset; - XEN_BACKEND = mkForce unset; - XEN_BALLOON = mkForce unset; - XEN_BALLOON_MEMORY_HOTPLUG = mkForce unset; - XEN_DOM0 = mkForce unset; - XEN_HAVE_PVMMU = mkForce unset; - XEN_MCE_LOG = mkForce unset; - XEN_PVH = mkForce unset; - XEN_SAVE_RESTORE = mkForce unset; - XEN_SYS_HYPERVISOR = mkForce unset; - PCI_XEN = mkForce unset; - POWER_RESET_GPIO = mkForce unset; - POWER_RESET_GPIO_RESTART = mkForce unset; - RCU_LAZY = mkForce unset; - REISERFS_FS_POSIX_ACL = mkForce unset; - REISERFS_FS_SECURITY = mkForce unset; - REISERFS_FS_XATTR = mkForce unset; - SWIOTLB_XEN = mkForce unset; - SUSPEND = mkForce unset; - PM = mkForce unset; - HIBERNATION = mkForce unset; - ACPI = mkForce unset; - CPU_FREQ = mkForce unset; - CPU_FREQ_DT = mkForce unset; - INTEL_IDLE = mkForce unset; - ISA_DMA_API = mkForce unset; - IA32_EMULATION = mkForce unset; - COMPAT = mkForce unset; - COMPAT_32 = mkForce unset; - KVM = mkForce unset; - BLOCK_LEGACY_AUTOLOAD = mkForce unset; - SWAP = mkForce unset; - CMA = mkForce unset; - FB = mkForce unset; - FB_EFI = mkForce unset; - FB_VESA = mkForce unset; - SECURITY_APPARMOR = mkForce unset; - - VT = no; - DRM_FBDEV_EMULATION = lib.mkForce no; - FONTS = mkForce unset; - FONT_8x8 = mkForce unset; - FONT_TER16x32 = mkForce unset; - FRAMEBUFFER_CONSOLE = mkForce unset; - FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER = mkForce unset; - FRAMEBUFFER_CONSOLE_DETECT_PRIMARY = mkForce unset; - FRAMEBUFFER_CONSOLE_ROTATION = mkForce unset; - RC_CORE = mkForce unset; - }; - }; -in -result // { __originalOverride = result.override; } diff --git a/pkgs/mkSystemdDropin.nix b/pkgs/mkSystemdDropin.nix deleted file mode 100644 index 56407bc..0000000 --- a/pkgs/mkSystemdDropin.nix +++ /dev/null @@ -1,39 +0,0 @@ -{ - lib, - runCommand, - writeShellScriptBin, -}: -{ - name, - prefix ? "10-all-", - dirs ? [ - "service" - "mount" - "socket" - "timer" - "target" - ], - - dropinText ? null, - extraCommands ? "", - ... -}@args: - -runCommand "${name}-dropin" - ( - lib.removeAttrs args [ - "name" - ] - // { - inherit dirs dropinText extraCommands; - } - ) - '' - set -euo pipefail - root=$out/lib/systemd/system - for dir in $dirs ; do - mkdir -p "$root/$dir".d - printf "%s" "$dropinText" > "$root/$dir.d/${prefix}${name}.conf" - done - runHook extraCommands - '' diff --git a/pkgs/taps/package.nix b/pkgs/taps/package.nix index e396748..c666cd9 100644 --- a/pkgs/taps/package.nix +++ b/pkgs/taps/package.nix @@ -30,8 +30,6 @@ stdenv.mkDerivation { rustc ]; buildInputs = [ ch-proxy ]; - - meta.mainProgram = "taps"; } # { lib, rustPlatform }: # diff --git a/pkgs/uvms-guest/guest.py b/pkgs/uvms-guest/guest.py deleted file mode 100644 index 87b3d1a..0000000 --- a/pkgs/uvms-guest/guest.py +++ /dev/null @@ -1,76 +0,0 @@ -import json -import os -import select -import socket -import subprocess - - -def handle_run(run: dict) -> dict: - res = {} - text = run.get("text", False) - env = { - **os.environ, - "PATH": ":".join( - os.environ.get("PATH", "").split(":") + run.get("EXTRA_PATH", []) - ), - } - proc = None - try: - proc = subprocess.Popen( - req["run"]["argv"], - text=text, - env=env, - cwd="/home/user", - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - res["status"] = "exec succeeded" - except Exception as e: - res["status"] = "exec failed" - res["exception"] = repr(e) - res["pid"] = getattr(proc, "pid", None) - try: - if proc is not None: - proc.wait(0.125) - res["long_running"] = False - res["returncode"] = getattr(proc, "returncode", None) - except subprocess.TimeoutExpired: - res["long_running"] = True - return res, proc - - -if __name__ == "__main__": - serv = socket.fromfd(3, socket.AF_VSOCK, socket.SOCK_STREAM) - - procs = [] - conns = [serv] - - while True: - rr, rw, xs = select.select(conns, [], []) - - for con in rr: - if con is serv: - con, (cid, port) = serv.accept() - assert cid == 2, cid - conns.append(con) - continue - req = con.recv(8192) - # IDK why but I keep getting empty messages - if req == b"": - continue - try: - req = json.loads(req) - print(f"Received {req=}") - except json.JSONDecodeError as e: - print(f"Couldn't interpret {req=}: {e}") - continue - if "run" in req: - res, proc = handle_run(req["run"]) - procs.append(proc) - else: - res = {"status": "unknown command"} - _, rw, _ = select.select([], [con], []) - assert rw, rw - res = json.dumps(res).encode("utf8") - print(f"Responding with {res=}") - con.send(res) diff --git a/pkgs/uvms-guest/package.nix b/pkgs/uvms-guest/package.nix deleted file mode 100644 index 66cfa2d..0000000 --- a/pkgs/uvms-guest/package.nix +++ /dev/null @@ -1,5 +0,0 @@ -{ - lib, - writers, -}: -writers.writePython3Bin "uvms-guest" { } ./guest.py diff --git a/pkgs/uvms/package.nix b/pkgs/uvms/package.nix deleted file mode 100644 index 109235e..0000000 --- a/pkgs/uvms/package.nix +++ /dev/null @@ -1,56 +0,0 @@ -{ - lib, - symlinkJoin, - writers, - writeClosure, - replaceVars, - bubblewrap, - cloud-hypervisor-gpu, - crosvm, - effective-cloud-hypervisor ? cloud-hypervisor-gpu, - execline, - s6, - strace, - util-linux, - virtiofsd, - - taps, - baseImage, -}: - -let - tools = map lib.getBin [ - execline - s6 - effective-cloud-hypervisor - virtiofsd - bubblewrap - strace - crosvm - taps - util-linux - ]; - toolsFarm = symlinkJoin { - name = "tools"; - paths = tools; - }; - toolsClosure = writeClosure toolsFarm; -in -writers.writePython3Bin "uvms" { } ( - replaceVars ./uvms.py { - BWRAP = "${lib.getExe bubblewrap}"; - TOOLS = "${toolsFarm}/bin"; - TOOLS_CLOSURE = toolsClosure; - CROSVM = lib.getExe crosvm; - STRACE = lib.getExe strace; - TAPS = "${lib.getExe taps}"; - VIRTIOFSD = "${lib.getExe virtiofsd}"; - - BASE_CONFIG = baseImage.config.system.build.ch; - SYSTEM = baseImage.config.system.build.toplevel; - SYSTEM_CLOSURE = writeClosure [ - baseImage.config.system.build.toplevel - baseImage.config.system.build.ch - ]; - } -) diff --git a/pkgs/uvms/uvms.py b/pkgs/uvms/uvms.py deleted file mode 100644 index ab71c6e..0000000 --- a/pkgs/uvms/uvms.py +++ /dev/null @@ -1,665 +0,0 @@ -# NOTE: This would have been bash, -# and this was execlineb previously, -# but it was just easier to reason in terms of context managers -# and try-except-finally branches for the cleanup bit, -# than in terms of traps or such. -# Treat this as bash. -# Treat this as throwaway shitcode. - -import os -import subprocess -import socket -import json -from argparse import ArgumentParser -from contextlib import contextmanager, closing, ExitStack - - -parser = ArgumentParser("supervise-vm") -parser.add_argument("--vm", default=None) -parser.add_argument("--prefix", default="$HOME/uvms/$VM") -parser.add_argument("--vm-config", default="@BASE_CONFIG@") # noqa: E501 -parser.add_argument("--persist-home", action="store_true") -parser.add_argument("--run", action="append") -parser.add_argument("app", nargs="*", default=()) - -TOOLS_DIR = "@TOOLS@" # noqa: E501 -SOCKETBINDER = TOOLS_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 -CH = TOOLS_DIR + "/cloud-hypervisor" -CHR = TOOLS_DIR + "/ch-remote" -TAPS = "@TAPS@" # noqa: E501 -VIRTIOFSD = "@VIRTIOFSD@" # noqa: E501 -BWRAP = "@BWRAP@" # noqa: E501 - -with open("@TOOLS_CLOSURE@", mode="r") as f: # noqa: E501 - TOOLS_CLOSURE = [ - *(ln.rstrip() for ln in f.readlines()), - os.path.dirname(__file__), - ] - -BASE_SYSTEM = "@SYSTEM@" # noqa: E501 -with open("@SYSTEM_CLOSURE@", mode="r") as f: # noqa: E501 - BASE_SYSTEM_CLOSURE = [ - *(ln.rstrip() for ln in f.readlines()), - ] - -PASSTHRU_PATH = ":".join([TOOLS_DIR, *os.environ.get("PATH", "").split(":")]) -PASSTHRU_ENV = { - **{ - k: v - for k, v in os.environ.items() - if k.startswith("RUST_") - or k.startswith("WAYLAND") - or k.startswith("XDG_") - or k.startswith("DBUS_") - or k.startswith("NIX_") - or k - in [ - "TAPS_SOCK", - ] - }, - "HOME": os.environ.get("HOME", os.getcwd()), - "PATH": PASSTHRU_PATH, -} - - -def preprocess_args(args_mut): - if not args_mut.app and args_mut.run: - args_mut.app = [*args_mut.run] - if not args_mut.vm: - args_mut.vm = args_mut.run[0] - keys = [k for k, v in args_mut._get_kwargs() if isinstance(v, str)] - for k in keys: - v = getattr(args_mut, k) - if "$HOME" in v: - setattr(args_mut, k, v.replace("$HOME", PASSTHRU_ENV["HOME"])) - for k in keys: - v = getattr(args_mut, k) - if "$VM" in v: - setattr(args_mut, k, v.replace("$VM", args.vm)) - for k in keys: - v = getattr(args_mut, k) - if "$PREFIX" in v: - setattr(args_mut, k, v.replace("$PREFIX", args.prefix)) - return args_mut - - -def alive_after(proc, timeout): - if proc is None: - return False - if proc.returncode is not None: - return False - try: - proc.wait(timeout) - except subprocess.TimeoutExpired: - return True - return False - - -class Processes: - def __init__(self, prefix, vm, check=True, **defaults): - self.prefix = prefix - self.vm = vm - self.check = check - self.defaults = defaults - self.processes = [] - - def make_env(self): - return { - **PASSTHRU_ENV, - "PATH": PASSTHRU_PATH, - "PREFIX": self.prefix, - "VM": self.vm, - } - - def exec(self, *args, **kwargs): - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - kwargs["check"] = kwargs.get("check", self.check) - kwargs["env"] = kwargs.get("env", self.make_env()) - return subprocess.run([*args], **self.defaults, **kwargs) - - def execline(self, *args, **kwargs): - return exec( - "execlineb", - "-c", - "\n".join(args), - **self.defaults, - executable=TOOLS_DIR + "/execlineb", - **{ - "env": self.make_env(), - "check": self.check, - "cwd": self.prefix, - **kwargs, - }, - ) - - @contextmanager - def popen(self, *args, **kwargs): - kwargs["pass_fds"] = kwargs.get("pass_fds", ()) - kwargs["env"] = kwargs.get("env", self.make_env()) - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - kwargs["text"] = kwargs.get("text", True) - kwargs["stdin"] = kwargs.get("stdin", subprocess.DEVNULL) - kwargs["stdout"] = kwargs.get("stdout", subprocess.DEVNULL) - kwargs["stderr"] = kwargs.get("stderr", subprocess.DEVNULL) - proc = None - try: - proc = subprocess.Popen( - args, - **kwargs, - ) - if not alive_after(proc, 0.125): - raise RuntimeError("Failed to start", args) - print(f"Started {args}") - self.processes.append(proc) - yield proc - print(f"Releasing {args}") - finally: - if subprocess.PIPE in (kwargs["stderr"], kwargs["stdout"]): - print(proc.communicate()) - while alive_after(proc, 0.125): - try: - proc.terminate() - proc.wait() - except Exception as e: - print(f"Cleanup failing: {e}") - - @contextmanager - def bwrap( - self, - *bwrap_args, - die_with_parent=True, - # Based on the args from - # `host/rootfs/image/usr/bin/run-vmm` - unshare_all=True, - uid=1000, - gid=100, - unshare_user=True, - unshare_ipc=None, - unshare_pid=None, - unshare_net=None, - unshare_uts=None, - unshare_cgroup_try=True, - bind=(), - dev_bind=(), - dev_bind_implicit=("/dev/kvm", "/dev/vfio"), - dev="/dev", - proc="/proc", - ro_bind_implicit=( - "/etc", - "/sys", - "/proc/sys", - "/dev/null", - "/proc/kallsyms", - *sorted(set([*TOOLS_CLOSURE, *BASE_SYSTEM_CLOSURE])), - ), - ro_bind=(), - remount_ro=("/proc/fs", "/proc/irq"), - tmpfs_implicit=( - "/dev/shm", - "/tmp", - "/var/tmp", - "/proc/fs", - "/proc/irq", - ), - tmpfs=(), - pass_fds=(2,), - **popen_kwargs, - ): - - bwrap_args_sock, remote = socket.socketpair() - remote.set_inheritable(True) - bwrap_args_f = bwrap_args_sock.makefile("w") - - def print_arg(*args): - print(*args, file=bwrap_args_f, sep="\0", end="\0") - - if unshare_all: - print_arg("--unshare-all") - if unshare_user: - print_arg("--unshare-user") - if uid is not None: - assert unshare_user - print_arg("--uid", uid) - if gid is not None: - assert unshare_user - print_arg("--gid", gid) - if unshare_ipc: - print_arg("--unshare-ipc") - if unshare_pid: - print_arg("--unshare-pid") - if unshare_net: - print_arg("--unshare-net") - elif unshare_net is False: - print_arg("--share-net") - if unshare_uts: - print_arg("--unshare-uts") - if unshare_cgroup_try: - print_arg("--unshare-cgroup-try") - if die_with_parent: - print_arg("--die-with-parent") - if dev: - print_arg("--dev", dev) - if proc: - print_arg("--proc", proc) - - for p in bind: - assert isinstance(p, (str, tuple)), p - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--bind", p1, p2) - for p in (*ro_bind, *ro_bind_implicit): - assert isinstance(p, (str, tuple)), p - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--ro-bind", p1, p2) - for p in (*dev_bind, *dev_bind_implicit): - assert isinstance(p, (str, tuple)), p - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--dev-bind", p1, p2) - for p in (*tmpfs, *tmpfs_implicit): - print_arg("--tmpfs", p) - # Hunch: order might matter... - for p in remount_ro: - print_arg("--remount-ro", p) - - bwrap_args_f.flush() - - try: - with ExitStack() as proc_es: - with ExitStack() as es: - es.enter_context(closing(remote)) - es.enter_context(closing(bwrap_args_sock)) - es.enter_context(closing(bwrap_args_f)) - proc = proc_es.enter_context( - self.popen( - "bwrap", - "--args", - str(remote.fileno()), - *bwrap_args, - **popen_kwargs, - executable=BWRAP, - pass_fds=(*pass_fds, remote.fileno()), - ) - ) - yield proc - finally: - assert proc.returncode is not None, proc - - @contextmanager - def run_ch(self): - # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) - # s.set_inheritable(True) - # s.setblocking(True) - # s.bind(self.prefix + "/vmm.sock") - args = [ - SOCKETBINDER, - "-B", - self.prefix + "/vmm.sock", - # "@STRACE@", # noqa: E501 - # "-Z", - # "-ff", - CH, - "--api-socket", - "fd=0", - # f"fd={s.fileno()}" - ] - cleanup_paths = [ - self.prefix + "/vmm.sock", - self.prefix + "/vmm.sock.lock", - self.prefix + "/vsock.sock", - ] - new_paths = [p for p in cleanup_paths if not os.path.exists(p)] - old_paths = [p for p in cleanup_paths if p not in new_paths] - with ExitStack() as cleanup: - cleanup.enter_context(removing(*new_paths)) - proc = cleanup.enter_context( - self.bwrap( - *args, - bind=[self.prefix], - # Probably just need the path to vmlinux - # ro_bind=["/nix/store"], # I give up - unshare_net=False, - shell=False, - # pass_fds=(s.fileno(),) - ) - ) - # s.close() - cleanup.enter_context(removing(*old_paths)) - assert alive_after(proc, 1.0), proc - if not os.path.exists(self.prefix + "/vmm.sock"): - raise RuntimeError( - f"{self.prefix}/vmm.sock should exist by now", - ) - if proc.returncode is not None: - raise RuntimeError("CH exited early") - yield proc - - @contextmanager - def start_gpu( - self, - ): - sock_path = self.prefix + "/gpu.sock" - args = [ - SOCKETBINDER, - "-b", - "1", - sock_path, - "s6-ipcserverd", - "-1c1", - # "@STRACE@", # noqa: E501 - # "-Z", - # "-ff", - "@CROSVM@", # noqa: E501 - "--no-syslog", - "device", - "gpu", - "--fd", - "0", - "--wayland-sock", - f'{PASSTHRU_ENV["XDG_RUNTIME_DIR"]}/{PASSTHRU_ENV["WAYLAND_DISPLAY"]}', # noqa: E501 - "--params", - '{ "context-types": "cross-domain:virgl2:venus" }', - ] - with self.popen( - *args, - stderr=None, - ) as proc, removing(sock_path, sock_path + ".lock"): - yield proc, sock_path - - @contextmanager - def start_virtiofsd( - self, - root_dir, - tag, - ro=True, - subdirs=None, - extra_flags=("--posix-acl", "--xattr"), - ): - - assert os.path.exists(root_dir) - - sock_path = self.prefix + f"/virtiofsd-{tag}.sock" - # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - # NOTE: Nope. Virtiofsd actually expects a blocking socket - # s.setblocking(True) - # s.set_inheritable(True) - - with ExitStack() as cleanup: # noqa: F841 - # s.bind(sock_path.encode("utf8")) - # cleanup.enter_context(closing(s)) - cleanup.enter_context(removing(sock_path, sock_path + ".pid")) - - args = [ - # If using bwrap(): - # "--argv0", "virtiofsd", - # "--uid", "1000", - # "--gid", "100", - # "--", - "unshare", - "-rUm", - "unshare", - "--map-user", - "1000", - "--map-group", - "100", - VIRTIOFSD, - "--shared-dir", - root_dir, - "--tag", - tag, - # "--fd", - # str(s.fileno()), - "--socket-path", - sock_path, - # If relying on bwrap(): - # "--sandbox", - # "none", - ] - if ro: - args.append("--readonly") - kwargs = { - # If bwrap(): - # "bind": [], - # ("ro_bind" if ro else "bind"): - # [*subdirs] - # if subdirs is not None - # else [root_dir], - # "pass_fds": (2, s.fileno()), - "stdout": subprocess.PIPE, - "stderr": subprocess.PIPE, - } - try: - with self.popen(*args, **kwargs) as p: - yield p, sock_path - finally: - if os.path.exists(sock_path): - os.remove(sock_path) - - -@contextmanager -def defer(f): - try: - yield - finally: - f() - - -@contextmanager -def removing(*paths): - try: - yield - finally: - for p in paths: - if os.path.exists(p): - os.remove(p) - - -def connect_ch_vsock( - vsock_sock_path, - port: int, - type=socket.SOCK_STREAM, - blocking=True, -) -> socket.socket: - s = socket.socket(socket.AF_UNIX, type, 0) - s.setblocking(blocking) - s.connect(vsock_sock_path) - - s.send(b"CONNECT %d\n" % port) - return s - - -@contextmanager -def listen_ch_vsock( - vsock_sock_path, - port: int, - type=socket.SOCK_STREAM, - blocking=True, -) -> socket.socket: - listen_path = vsock_sock_path + "_%d" % port - s = socket.socket(socket.AF_UNIX, type, 0) - s.setblocking(blocking) - s.bind(listen_path) - s.listen() - try: - yield s - finally: - os.remove(listen_path) - - -def main(args, args_next, cleanup, ps): - send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}" - - os.makedirs(send_dir, exist_ok=True) - os.makedirs(args.prefix, exist_ok=True) - - ch_remote = [ - "ch-remote", - "--api-socket", - args.prefix + "/vmm.sock", - ] - - with open(args.vm_config) as f: - config = json.load(f) - - app_paths = [] - for a in args.app: - out_path = ps.exec( - "nix-build", - "", - "-A", - a, - "--no-out-link", - capture_output=True, - text=True, - ).stdout.strip() - assert out_path.startswith("/nix/store/") - app_paths.append(out_path) - apps_closure = ps.exec( # noqa: F841 - "nix-store", - "-qR", - *app_paths, - capture_output=True, - text=True, - ).stdout.split() - - ready_sock = cleanup.enter_context( - listen_ch_vsock(ps.prefix + "/vsock.sock", 8888), - ) - - virtiofs_socks = [] - _, sock_path = cleanup.enter_context( - ps.start_virtiofsd( - send_dir, - tag="send", - ro=False, - ) - ) - virtiofs_socks.append(("send", sock_path)) - _, sock_path = cleanup.enter_context( - ps.start_virtiofsd( - "/nix/store", - subdirs=apps_closure, - tag="apps", - ) - ) - virtiofs_socks.append(("apps", sock_path)) - _, sock_path = cleanup.enter_context( - ps.start_virtiofsd( - "/nix/store", - subdirs=BASE_SYSTEM_CLOSURE, - tag="system", - ) - ) - virtiofs_socks.append(("system", sock_path)) - - if args.persist_home: - os.makedirs(args.prefix + "/home", exist_ok=True) - _, sock_path = cleanup.enter_context( - ps.start_virtiofsd( - args.prefix + "/home", - subdirs=BASE_SYSTEM_CLOSURE, - tag="home", - ro=False, - ) - ) - virtiofs_socks.append(("home", sock_path)) - config["payload"]["cmdline"] += " uvms.persist-home=1" - - gpud, gpud_path = cleanup.enter_context(ps.start_gpu()) - - ch = cleanup.enter_context(ps.run_ch()) - - ps.exec( - *ch_remote, - "create", - input=json.dumps(config), - text=True, - ) - ps.exec( - TAPS, - "pass", - *ch_remote, - "add-net", - "id=wan,fd=3,mac=00:00:00:00:00:01", - ) - - # TODO: add-fs apps closure separately - for tag, sock_path in virtiofs_socks: - ps.exec(*ch_remote, "add-fs", f"tag={tag},socket={sock_path},id={tag}") - ps.exec(*ch_remote, "add-gpu", f"socket={gpud_path}") - ps.exec(*ch_remote, "boot") - ps.exec(*ch_remote, "info") - - with ready_sock: - ready_sock.settimeout(16.0) - try: - con, _ = ready_sock.accept() - except: # noqa: E722 - print( - "CH didn't try connecting to the readiness notification socket" - ) # noqa: E501 - else: - with con: - msg = con.recv(128) - assert msg.startswith(b"READY=1"), msg - - with connect_ch_vsock(ps.prefix + "/vsock.sock", 24601) as guest: - for r in args.run: - try: - guest.send( - json.dumps( - { - "run": { - "argv": [r], - "EXTRA_PATH": [ - f"{a}/bin" for a in app_paths - ], # noqa: E501 - } - } - ).encode("utf8") - ) - res = guest.recv(8192) - try: - res = json.loads(guest.recv(8192)) - except json.JSONDecodeError as e: - print(f"Couldn't interpret --run {r} response: {e} {res}") - continue - adverb = ( - "Successfully" - if res["status"] == "exec succeeded" - else "Failed to" # noqa: E501 - ) - print(f"{adverb} --run {r}: {res}") - except Exception as e: - print(f"Couldn't --run {r}: {repr(e)}") - try: - ch.wait() - except KeyboardInterrupt: - pass - - -if __name__ == "__main__": - args, args_next = parser.parse_known_args() - preprocess_args(args) - ps = Processes( - prefix=args.prefix, - vm=args.vm, - ) - - try: - with ExitStack() as cleanup: - main(args, args_next, cleanup, ps) - finally: - for p in ps.processes: - if p.returncode is not None: - continue - try: - print(f"Cleanup failed. Re-trying the killing of {p}") - p.terminate() - except: # noqa: E722 - pass - for p in ps.processes: - if p.returncode is not None: - continue - try: - p.wait() - except: # noqa: E722 - pass diff --git a/profiles/baseImage.nix b/profiles/baseImage.nix deleted file mode 100644 index 87f8df5..0000000 --- a/profiles/baseImage.nix +++ /dev/null @@ -1,407 +0,0 @@ -{ - lib, - config, - modulesPath, - pkgs, - ... -}: -let - inherit (lib) mkOption types concatStringsSep; - jsonType = (pkgs.formats.json { }).type; - - inherit (config.system.build) initialRamdisk; - inherit (config.system.boot.loader) initrdFile; - inherit (config.boot.kernelPackages) kernel; - kernelTarget = pkgs.stdenv.hostPlatform.linux-kernel.target; - uvmsPkgs = pkgs.callPackage ../pkgs { }; - waylandSock = "/run/user/1000/wayland-1"; - env = { - XDG_RUNTIME_DIR = "/run/user/1000"; - WAYLAND_DISPLAY = "wayland-1"; - - # MESA_LOADER_DRIVER_OVERRIDE = "zink"; - - ELECTRON_OZONE_PLATFORM_HINT = "wayland"; - MOZ_ENABLE_WAYLAND = "1"; - QT_QPA_PLATFORM = "wayland"; # Qt Applications - GDK_BACKEND = "wayland"; # GTK Applications - XDG_SESSION_TYPE = "wayland"; # Electron Applications - SDL_VIDEODRIVER = "wayland"; - CLUTTER_BACKEND = "wayland"; - NIXOS_OZONE_WL = "1"; - }; -in -{ - imports = [ - (modulesPath + "/profiles/minimal.nix") - ./debug-closure.nix - ./minimal.nix - ./on-failure.nix - ]; - config = { - some.failure-handler.enable = true; - hardware.graphics.enable = true; - # boot.kernelPackages = pkgs.linuxPackagesFor uvmsPkgs.linux-uvm; - # boot.isContainer = true; - boot.initrd.kernelModules = [ - "drm" - "virtio_blk" - "virtiofs" - "virtio_gpu" - "virtio_mmio" - "virtio_pci" - "overlay" - ]; - boot.kernelModules = [ - "drm" - "erofs" - "overlay" - "virtio_blk" - "virtiofs" - "virtio_gpu" - "virtio_mmio" - "virtio_pci" - ]; - boot.initrd.systemd.initrdBin = [ - pkgs.fuse - pkgs.fuse3 - ]; - fileSystems = { - "/" = lib.mkDefault { - device = "rootfs"; # how does this work? does this assign a label to the tmpfs? - fsType = "tmpfs"; - options = [ "size=20%,mode=0755" ]; - neededForBoot = true; - }; - "/nix/store" = { - fsType = "overlay"; - overlay.lowerdir = [ - "/nix/.ro-stores/system" - "/nix/.ro-stores/apps" - ]; - neededForBoot = true; - }; - "/nix/.ro-stores/system" = { - device = "system"; - fsType = "virtiofs"; - options = [ - "defaults" - "ro" - "x-systemd.requires=systemd-modules-load.service" - ]; - neededForBoot = true; - }; - "/nix/.ro-stores/apps" = { - device = "apps"; - fsType = "virtiofs"; - options = [ - "defaults" - "ro" - "x-systemd.requires=systemd-modules-load.service" - ]; - neededForBoot = true; - }; - }; - - systemd.mounts = [ - { - type = "virtiofs"; - where = "/home/user"; - what = "home"; - after = [ "systemd-modules-load.service" ]; - wantedBy = [ "local-fs.target" ]; - before = [ "local-fs.target" ]; - requires = [ "systemd-modules-load.service" ]; - options = lib.concatStringsSep "," [ - "defaults" - "rw" - "X-mount.owner=1000" - "X-mount.group=100" - ]; - unitConfig = { - ConditionKernelCommandLine = "uvms.persist-home=1"; - }; - } - { - type = "virtiofs"; - where = "/home/user/send"; - what = "send"; - wants = [ - "home-user.mount" - "-.mount" - ]; - after = [ - "systemd-modules-load.service" - "home-user.mount" - "-.mount" - ]; - wantedBy = [ "local-fs.target" ]; - before = [ "local-fs.target" ]; - options = lib.concatStringsSep "," [ - "defaults" - "rw" - "X-mount.owner=1000" - "X-mount.group=100" - ]; - unitConfig = { - DefaultDependencies = false; - }; - } - ]; - # systemd.services."mount-home-user-send" = { - # wants = [ "home-user.mount" ]; - # after = [ - # "systemd-modules-load.service" - # "home-user.mount" - # "-.mount" - # ]; - # wantedBy = [ "local-fs.target" ]; - # before = [ "local-fs.target" ]; - # unitConfig = { - # DefaultDependencies = false; - # }; - # environment.PATH = lib.mkForce ( - # lib.makeBinPath [ - # pkgs.fuse - # pkgs.fuse3 - # pkgs.coreutils - # ] - # ); - # serviceConfig = { - # Type = "oneshot"; - # RemainsAfterExit = true; - # ExecStart = [ - # "/run/current-system/sw/bin/mkdir -p /home/user/send" - # "/run/current-system/sw/bin/chown user /home/user/send" - # "/run/current-system/sw/sbin/mount -t virtiofs -o defaults,rw send /home/user/send" - # ]; - # StandardOutput = "journal+console"; - # StandardError = "journal+console"; - # }; - # }; - - systemd.network.enable = true; - networking.useNetworkd = true; - networking.nftables.enable = true; - networking.useDHCP = true; - networking.nameservers = [ "1.1.1.1" ]; - services.resolved.enable = lib.mkForce true; - - system.activationScripts.specialfs = lib.mkForce ""; - # networking.firewall.enable = false; - console.enable = false; - services.udev.packages = lib.mkDefault [ ]; - systemd.services."systemd-oomd".enable = false; - - users.mutableUsers = false; - users.users.root.password = "hacktheplanet!"; - users.groups.users = { }; - users.users.user = { - uid = 1000; - isNormalUser = true; - password = "hacktheplanet!"; - extraGroups = [ - "video" - "render" - "users" - "wheel" - ]; - }; - - environment.variables = env; - systemd.globalEnvironment = env; - - systemd.tmpfiles.settings."10-xdg" = { - ${env.XDG_RUNTIME_DIR}.d = { - user = "user"; - group = "users"; - mode = "0755"; - }; - }; - - systemd.sockets."wayland-proxy" = { - listenStreams = [ - waylandSock - ]; - socketConfig = { - SocketUser = "user"; - SocketGroup = "users"; - FileDescriptorName = "wayland"; - }; - wantedBy = [ "sockets.target" ]; - partOf = [ "wayland-proxy.service" ]; - }; - systemd.services."wayland-proxy" = { - wantedBy = [ "default.target" ]; - serviceConfig = { - User = "user"; - Group = "users"; - ExecStart = "${lib.getExe pkgs.wayland-proxy-virtwl} --virtio-gpu"; - # ExecStart = "${lib.getExe uvmsPkgs.wl-cross-domain-proxy} --listen-fd --filter-global wp_presentation"; - ExecStartPre = [ - "+/run/current-system/sw/bin/chmod 0666 /dev/dri/card0 /dev/dri/renderD128" - ]; - StandardOutput = "journal+console"; - StandardError = "journal+console"; - Restart = "on-failure"; - RestartSec = 5; - }; - }; - - systemd.sockets."uvms-guest" = { - wantedBy = [ "default.target" ]; - listenStreams = [ - "vsock::24601" - ]; - partOf = [ "uvms-guest.service" ]; - }; - systemd.services."uvms-guest" = { - serviceConfig = { - User = "user"; - Group = "users"; - ExecStart = "${lib.getExe uvmsPkgs.uvms-guest}"; - StandardOutput = "journal+console"; - StandardError = "journal+console"; - Restart = "on-failure"; - RestartSec = 5; - }; - }; - - fonts.enableDefaultPackages = true; - - boot.kernelParams = [ - "earlyprintk=ttyS0" - "console=ttyS0" - "reboot=t" - "panic=-1" - "io.systemd.credential:vmm.notify_socket=vsock-stream:2:8888" - # "rootfstype=virtiofs" - # "root=rootstore" - ]; - }; - - options = { - system.build.ch = mkOption { - type = types.package; - default = (pkgs.formats.json { }).generate "vm.json" config.uvms.ch.settings; - }; - uvms.ch.settings = mkOption { - default = { }; - type = types.submodule { - freeformType = jsonType; - options = { - payload = { - cmdline = mkOption { - type = types.str; - default = concatStringsSep " " ( - config.boot.kernelParams - ++ [ - # "init=${lib.removePrefix "/nix/store" "${config.system.build.toplevel}"}/init" - "init=${config.system.build.toplevel}/init" - ] - ); - defaultText = ''concatStringsSep " " ${config.boot.kernelParams}''; - }; - kernel = mkOption { - type = types.str; - default = "${kernel}/${kernelTarget}"; - }; - initramfs = mkOption { - type = types.nullOr types.str; - default = "${initialRamdisk}/${initrdFile}"; - }; - }; - vsock = { - cid = mkOption { - type = types.int; - default = 4; - }; - socket = mkOption { - type = types.str; - default = "vsock.sock"; - }; - }; - "api-socket" = mkOption { - type = types.str; - default = "vmm.sock"; - }; - "serial".mode = mkOption { - type = types.str; - default = "File"; - }; - "serial".file = mkOption { - type = types.nullOr types.str; - default = "serial"; - }; - "console".mode = mkOption { - type = types.str; - default = "Pty"; - }; - "console".file = mkOption { - type = types.nullOr types.str; - default = null; - }; - # "watchdog" = true; - # "seccomp" = true; - disks = mkOption { - default = [ ]; - type = types.listOf ( - types.submodule { - freeformType = jsonType; - options = { - path = mkOption { - type = types.oneOf [ - types.path - types.str - ]; - }; - readonly = mkOption { - type = types.bool; - default = true; - }; - id = mkOption { type = types.str; }; - }; - } - ); - }; - memory = mkOption { - default = { }; - type = types.submodule { - freeformType = jsonType; - options = { - size = mkOption { - type = types.int; - default = 1536 * 1048576; - }; - shared = mkOption { - type = types.bool; - default = true; - }; - mergeable = mkOption { - type = types.bool; - default = true; - }; - }; - }; - }; - cpus = mkOption { - default = { }; - type = types.submodule { - freeformType = jsonType; - options = { - boot_vcpus = mkOption { - type = types.int; - default = 4; - }; - max_vcpus = mkOption { - type = types.int; - default = 4; - }; - }; - }; - }; - }; - }; - }; - }; -} diff --git a/profiles/ch-runner.nix b/profiles/ch-runner.nix index ef32247..f156705 100644 --- a/profiles/ch-runner.nix +++ b/profiles/ch-runner.nix @@ -9,7 +9,7 @@ # but we shall begin by reproducing at least some of their work. let - cfg = config.uvms.ch; + cfg = config.uvms.cloud-hypervisor; inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; @@ -22,7 +22,13 @@ let getBin ; - package = uvmsPkgs.cloud-hypervisor-gpu; + package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: { + patches = oldAttrs.patches or [ ] ++ [ + # ../patches/ch.patch + ]; + buildType = "debug"; + dontStrip = true; + }); uvmsPkgs = pkgs.callPackage ../pkgs { }; chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings; @@ -48,21 +54,69 @@ let in { options = { - uvms.ch.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; - uvms.ch.runner = mkOption { + uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; + uvms.cloud-hypervisor.runner = mkOption { type = types.package; description = "A naive script for running this system in cloud-hypervisor"; }; - uvms.ch.debugger = mkOption { + uvms.cloud-hypervisor.debugger = mkOption { type = types.lazyAttrsOf types.anything; description = "Same but you can debug the kernel"; }; - uvms.ch.settingsFile = mkOption { + uvms.cloud-hypervisor.settingsFile = mkOption { type = types.package; default = chSettingsFile; defaultText = "..."; readOnly = true; }; + uvms.cloud-hypervisor.settings = mkOption { + default = { }; + type = types.submodule { + freeformType = (pkgs.formats.json { }).type; + options = { + payload = { + cmdline = mkOption { type = types.str; }; + kernel = mkOption { type = types.str; }; + initramfs = mkOption { + type = types.str; + default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; + }; + }; + vsock = { + cid = mkOption { + type = types.int; + default = 4; + }; + socket = mkOption { + type = types.str; + default = "vsock.sock"; + }; + }; + "api-socket" = mkOption { + type = types.str; + default = "vmm.sock"; + }; + "serial".mode = mkOption { + type = types.str; + default = "File"; + }; + "serial".file = mkOption { + type = types.nullOr types.str; + default = "serial"; + }; + "console".mode = mkOption { + type = types.str; + default = "Pty"; + }; + "console".file = mkOption { + type = types.nullOr types.str; + default = null; + }; + # "watchdog" = true; + # "seccomp" = true; + }; + }; + }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; @@ -70,24 +124,43 @@ in uvms.cloud-hypervisor.cmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ + "earlyprintk=ttyS0" + "console=ttyS0" + "reboot=t" + "panic=-1" + "init=${config.system.build.toplevel}/init" ] ++ config.boot.kernelParams ++ config.uvms.cloud-hypervisor.extraCmdline; }; }; - imports = [ ./baseImage.nix ]; config = lib.mkMerge [ { - # boot.kernelPackages = pkgs.linuxPackagesFor (uvmsPkgs.linux-uvm); - uvms.ch.settings = { + uvms.cloud-hypervisor.settings = { + payload = { + cmdline = lib.concatStringsSep " " cfg.cmdline; + kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; + }; + disks = map (img: { + path = img; + readonly = true; + id = toString img.label; + }) layers; memory = { + size = 1536 * 1048576; + shared = true; + mergeable = true; # hotplugged_size = 512 * 1048576; # hotplugd_size = 1536 * 1048576; # hotplug_method = "virtio-mem" }; + cpus = { + boot_vcpus = 4; + max_vcpus = 4; + }; }; - uvms.ch.debugger = pkgs.testers.runNixOSTest ( + uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( { config, ... }: { name = "test-run-${hostName}"; @@ -197,9 +270,435 @@ in ); # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum - uvms.ch.runner = writeElb "run-${hostName}" '' - ${lib.getExe uvmsPkgs.uvms} --vm-config=${chSettingsFile} --vm=${hostName} - ''; + uvms.cloud-hypervisor.runner = + let + toolsClosure = pkgs.writeClosure [ + (lib.getBin pkgs.execline) + (lib.getBin pkgs.s6) + (lib.getBin package) + (lib.getBin pkgs.virtiofsd) + (lib.getBin pkgs.bubblewrap) + uvmsPkgs.taps + ]; + + superviseVm = getExe superviseVm'; + superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } '' + import os + import subprocess + import socket + from argparse import ArgumentParser + from contextlib import contextmanager, closing, ExitStack + + + parser = ArgumentParser("supervise-vm") + parser.add_argument("--vm") + parser.add_argument("--prefix", default="$HOME/uvms/$VM") + parser.add_argument("--sock", default="$PREFIX/supervisor.sock") + parser.add_argument("--vm-config") + + MSG_SIZE = 16 + ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501 + S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501 + CH_DIR = "${lib.getBin package}/bin" # noqa: E501 + UTIL_LINUX_DIR = "${lib.getBin pkgs.util-linux}/bin" # noqa: E501 + SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 + CH_PATH = CH_DIR + "/cloud-hypervisor" + CHR_PATH = CH_DIR + "/ch-remote" + TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501 + VIRTIOFSD_PATH = "${lib.getExe pkgs.virtiofsd}" # noqa: E501 + BWRAP_PATH = "${lib.getExe pkgs.bubblewrap}" # noqa: E501 + + with open("${toolsClosure}", mode="r") as f: # noqa: E501 + CLOSURE = [ + *(ln.rstrip() for ln in f.readlines()), + "${placeholder "out"}", # noqa: E501 + ] + + PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR, UTIL_LINUX_DIR]) + PASSTHRU_ENV = { + **{ + k: v + for k, v in os.environ.items() + if k.startswith("RUST") + or k.startswith("WAYLAND") + or k in [ + "TAPS_SOCK", + ] + }, + "HOME": os.environ.get("HOME", os.getcwd()), + "PATH": PASSTHRU_PATH, + } + + + def preprocess_args(args_mut): + keys = [ + k + for k, v + in args_mut._get_kwargs() + if isinstance(v, str)] + for k in keys: + v = getattr(args_mut, k) + if "$HOME" in v: + setattr( + args_mut, + k, + v.replace("$HOME", PASSTHRU_ENV["HOME"])) + for k in keys: + v = getattr(args_mut, k) + if "$VM" in v: + setattr(args_mut, k, v.replace("$VM", args.vm)) + for k in keys: + v = getattr(args_mut, k) + if "$PREFIX" in v: + setattr(args_mut, k, v.replace("$PREFIX", args.prefix)) + return args_mut + + + class Processes: + def __init__(self, prefix, vm, check=True, **defaults): + self.prefix = prefix + self.vm = vm + self.check = check + self.defaults = defaults + + def make_env(self): + return { + **PASSTHRU_ENV, + "PATH": PASSTHRU_PATH, + "PREFIX": self.prefix, + "VM": self.vm, + } + + def exec(self, *args, **kwargs): + kwargs["cwd"] = kwargs.get("cwd", self.prefix) + kwargs["check"] = kwargs.get("check", self.check) + kwargs["env"] = kwargs.get("env", self.make_env()) + return subprocess.run( + [*args], + **self.defaults, + **kwargs) + + def execline(self, *args, **kwargs): + return exec( + "execlineb", "-c", "\n".join(args), + **self.defaults, + executable=ELB_DIR + "/execlineb", + **{ + "env": self.make_env(), + "check": self.check, + "cwd": self.prefix, + **kwargs, + }, + ) + + def popen(self, *args, **kwargs): + kwargs["pass_fds"] = kwargs.get("pass_fds", ()) + kwargs["env"] = kwargs.get("env", self.make_env()) + kwargs["cwd"] = kwargs.get("cwd", self.prefix) + return subprocess.Popen( + args, + **kwargs, + ) + + @contextmanager + def bwrap( + self, + *bwrap_args, + + die_with_parent=True, + + # Based on the args from + # `host/rootfs/image/usr/bin/run-vmm` + unshare_all=True, + unshare_user=True, + unshare_ipc=None, + unshare_pid=None, + unshare_net=None, + unshare_uts=None, + unshare_cgroup_try=True, + bind=(), + dev_bind=("/dev/kvm", "/dev/vfio"), + dev="/dev", + proc="/proc", + ro_bind=( + "/etc", + "/sys", + "/proc/sys", + "/dev/null", + "/proc/kallsyms", + *CLOSURE), + ro_bind_extra=(), + remount_ro=("/proc/fs", "/proc/irq"), + tmpfs=("/dev/shm", "/tmp", "/var/tmp", "/proc/fs", "/proc/irq"), + tmpfs_extra=(), + + pass_fds=(2,), + **popen_kwargs): + + bwrap_args_sock, remote = socket.socketpair() + remote.set_inheritable(True) + bwrap_args_f = bwrap_args_sock.makefile("w") + with closing(bwrap_args_sock), closing(bwrap_args_f): + def print_arg(*args): + print(*args, file=bwrap_args_f, sep="\0", end="\0") + + if unshare_all: + print_arg("--unshare-all") + if unshare_user: + print_arg("--unshare-user") + if unshare_ipc: + print_arg("--unshare-ipc") + if unshare_pid: + print_arg("--unshare-pid") + if unshare_net: + print_arg("--unshare-net") + if unshare_uts: + print_arg("--unshare-uts") + if unshare_cgroup_try: + print_arg("--unshare-cgroup-try") + if die_with_parent: + print_arg("--die-with-parent") + + for p in bind: + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--bind", p1, p2) + for p in (*ro_bind, *ro_bind_extra): + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--ro-bind", p1, p2) + for p in dev_bind: + p1, p2 = (p, p) if isinstance(p, str) else p + print_arg("--dev-bind", p1, p2) + for p in (*tmpfs, *tmpfs_extra): + print_arg("--tmpfs", p) + # Hunch: order might matter... + for p in remount_ro: + print_arg("--remount-ro", p) + + bwrap_args_f.flush() + + with closing(remote): + proc = self.popen( + "bwrap", "--args", str(remote.fileno()), *bwrap_args, + **popen_kwargs, + executable=BWRAP_PATH, + pass_fds=(*pass_fds, remote.fileno()), + ) + + with proc as p: + try: + yield p + finally: + try: + p.poll() + except: # noqa: E722 + pass + if p.returncode is None: + p.terminate() + p.wait() + + @contextmanager + def run_ch(self): + args = [ + SOCKETBINDER_PATH, + "-B", + self.prefix + "/vmm.sock", + CH_PATH, + "--api-socket", + "fd=0", + ] + p = self.popen( + *args, + shell=False, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + pass_fds=(2,)) + try: + p.wait(0.125) + needs_cleanup = False + except subprocess.TimeoutExpired: + needs_cleanup = True + if not os.path.exists(self.prefix + "/vmm.sock"): + raise RuntimeError(f"{self.prefix}/vmm.sock should exist by now") + if p.returncode is not None: + raise RuntimeError("CH exited early") + try: + yield p + finally: + try: + p.poll() + except: # noqa: E722 + pass + if p.returncode is None: + p.terminate() # CH handles SIG{INT,TERM}? + p.wait() + unlink_paths = [ + self.prefix + "/vmm.sock", + self.prefix + "/vmm.sock.lock", + self.prefix + "/vsock.sock", + ] if needs_cleanup else [] + for p in unlink_paths: + if os.path.exists(p): + os.remove(p) + + @contextmanager + def add_virtiofsd( + self, + root_dir, + tag, + ro=False, + subdirs=None, + extra_flags=("--posix-acl",)): + + assert os.path.exists(root_dir) + + sock_path = self.prefix + f"/virtiofsd-{tag}.sock" + # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + # NOTE: Nope. Virtiofsd actually expects a blocking socket + # s.setblocking(True) + + def rm_sock(): + if os.path.exists(sock_path): + os.remove(sock_path) + + with ExitStack() as cleanup: # noqa: F841 + # s.bind(sock_path.encode("utf8")) + # cleanup.enter_context(closing(s)) + cleanup.enter_context(defer(rm_sock)) + + args = [ + # If using bwrap(): + # "--argv0", "virtiofsd", + # "--uid", "1000", + # "--gid", "1000", + # "--", + "unshare", "-rUm", + "unshare", "--map-user", "1000", "--map-group", "1000", + VIRTIOFSD_PATH, + "--shared-dir", + root_dir, + "--tag", + tag, + + # "--fd", + # str(s.fileno()), + "--socket-path", + sock_path, + + # If relying on bwrap(): + # "--sandbox", + # "none", + ] + if ro: + args.append("--readonly") + kwargs = { + # If bwrap(): + # "bind": [], + # ("ro_bind_extra" if ro else "bind"): + # [*subdirs] + # if subdirs is not None + # else [root_dir], + + # "pass_fds": (2, s.fileno()), + } + proc_ctx = self.popen(*args, **kwargs) + with proc_ctx as p: + try: + try: + p.wait(0.125) + except subprocess.TimeoutExpired: + pass + if p.returncode is not None: + raise RuntimeError("virtiofsd exited too early") + yield p, sock_path + finally: + if p.returncode is None: + p.kill() + p.wait() + if os.path.exists(sock_path): + os.remove(sock_path) + + + @contextmanager + def defer(f): + try: + yield + finally: + f() + + + if __name__ == "__main__": + args, args_next = parser.parse_known_args() + preprocess_args(args) + + os.makedirs(args.prefix, exist_ok=True) + ps = Processes( + prefix=args.prefix, + vm=args.vm, + ) + + ch_remote = [ + "ch-remote", + "--api-socket", + args.prefix + "/vmm.sock", + ] + + with ExitStack() as cleanup: + ch = cleanup.enter_context(ps.run_ch()) + ps.exec(*ch_remote, "create", args.vm_config) + ps.exec( + TAPS_PATH, "pass", + *ch_remote, "add-net", + "id=wan,fd=3,mac=00:00:00:00:00:01") + + send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}" + os.makedirs(send_dir, exist_ok=True) + vfsd, vfsd_path = cleanup.enter_context( + ps.add_virtiofsd( + send_dir, + tag="send", + )) + ps.exec(*ch_remote, "add-fs", f"tag=send,socket={vfsd_path},id=send") + ps.exec(*ch_remote, "boot") + ps.exec(*ch_remote, "info") + try: + ch.wait() + except KeyboardInterrupt: + pass + ''; + in + writeElb "run-${hostName}" '' + ${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName} + ''; } + (lib.mkIf cfg.enable { + boot.initrd.availableKernelModules = [ + "erofs" + "overlay" + "virtio_mmio" + "virtio_pci" + "virtio_blk" + # "9pnet_virtio" + # "9p" + "virtiofs" + ]; + boot.initrd.systemd.enable = lib.mkDefault true; + fileSystems = { + "/nix/store" = { + fsType = "overlay"; + overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers; + neededForBoot = true; + }; + } + // lib.listToAttrs ( + map ( + img: + lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" { + device = "/dev/disk/by-label/${img.label}"; + neededForBoot = true; + options = [ "x-systemd.device-timeout=5" ]; + } + ) layers + ); + }) ]; } diff --git a/profiles/debug-closure.nix b/profiles/debug-closure.nix index 86137c0..d1772da 100644 --- a/profiles/debug-closure.nix +++ b/profiles/debug-closure.nix @@ -15,9 +15,15 @@ let inherit (ps) writeErofsLayers; emptySystem = import (pkgs.path + "/nixos/lib/eval-config.nix") { modules = [ - ./minimal.nix + (modulesPath + "/profiles/minimal.nix") { system.stateVersion = config.system.stateVersion; + fileSystems."/".fsType = "tmpfs"; + boot.loader.grub.enable = false; + networking.hostName = "base"; + networking.nftables.enable = true; + networking.useNetworkd = true; + systemd.network.enable = true; } ]; }; diff --git a/profiles/minimal.nix b/profiles/minimal.nix deleted file mode 100644 index 1ac85c8..0000000 --- a/profiles/minimal.nix +++ /dev/null @@ -1,36 +0,0 @@ -{ - lib, - config, - modulesPath, - ... -}: -{ - imports = [ - (modulesPath + "/profiles/minimal.nix") - ]; - boot.loader.grub.enable = false; - boot.initrd.systemd.enable = true; - networking.useNetworkd = true; - networking.nftables.enable = config.networking.firewall.enable || config.networking.nat.enable; - fileSystems."/".fsType = lib.mkDefault "tmpfs"; - networking.hostName = lib.mkDefault "base"; - - systemd.sysusers.enable = false; - services.userborn.enable = true; # nikstur it - - nix.enable = false; - services.logrotate.enable = false; - services.udisks2.enable = false; - system.tools.nixos-generate-config.enable = false; - systemd.coredump.enable = false; - powerManagement.enable = false; - boot.kexec.enable = false; - system.switch.enable = false; - services.resolved.enable = false; - - systemd.services.generate-shutdown-ramfs.enable = lib.mkForce false; - systemd.services.systemd-remount-fs.enable = lib.mkForce false; - systemd.services.systemd-pstore.enable = lib.mkForce false; - systemd.services.lastlog2-import.enable = lib.mkForce false; - # systemd.services.suid-sgid-wrappers.enable = lib.mkForce false; -} diff --git a/profiles/on-failure.nix b/profiles/on-failure.nix deleted file mode 100644 index c5c256d..0000000 --- a/profiles/on-failure.nix +++ /dev/null @@ -1,72 +0,0 @@ -{ - lib, - config, - pkgs, - ... -}: -let - cfg = config.some.failure-handler; - jobScript = pkgs.writeShellScriptBin "show-status" '' - set -euo pipefail - - export PATH=${lib.getBin config.boot.initrd.systemd.package}/bin''${PATH:+:}$PATH - export PATH=${lib.getBin pkgs.util-linux}/bin''${PATH:+:}$PATH - export PATH=${lib.getBin pkgs.gnugrep}/bin''${PATH:+:}$PATH - - unit="$1" - shift - - systemctl status "$unit" >&2 || true - patterns=$unit$'\n'error - dmesg | grep -Fi "$patterns" || true - ''; - mkSystemdDropin = pkgs.callPackage ../pkgs/mkSystemdDropin.nix { }; -in -{ - options.some.failure-handler = { - enable = lib.mkEnableOption "Set up show-status@.service as a default OnFailure dependency"; - stage-1.enable = - lib.mkEnableOption "Set up show-status@.service as a default OnFailure dependency in initramfs/initrd" - // { - default = cfg.enable; - }; - package = lib.mkOption { - type = lib.types.package; - readOnly = true; - description = "The internal package with the drop-ins"; - }; - }; - config = { - some.failure-handler.package = mkSystemdDropin { - name = "status-on-failure"; - inherit jobScript; - dropinText = '' - [Unit] - OnFailure=status@%n.service - ''; - serviceText = '' - [Unit] - DefaultDependencies=no - Description=Show status for %i - - [Service] - Type=oneshot - StandardOutput=journal+console - StandardError=journal+console - ExecStart=${lib.getExe jobScript} "%i" - JoinsNamespaceOf= - DelegateNamespaces= - ''; - extraCommands = '' - printf "%s" "$serviceText" > "$root/status@.service" - ''; - }; - boot.initrd.systemd.packages = lib.optionals cfg.stage-1.enable [ cfg.package ]; - boot.initrd.systemd.storePaths = lib.optionals cfg.stage-1.enable [ - jobScript - pkgs.util-linux - pkgs.gnugrep - ]; - systemd.packages = lib.optionals cfg.enable [ cfg.package ]; - }; -} diff --git a/profiles/vmapp-demo.nix b/profiles/vmapp-demo.nix index 3af62bc..2f960b2 100644 --- a/profiles/vmapp-demo.nix +++ b/profiles/vmapp-demo.nix @@ -212,9 +212,9 @@ in ''} %i"; }; - boot.initrd.systemd.settings.Manager.DefaultTimeoutStartSec = lib.mkDefault 30; - systemd.settings.Manager.DefaultTimeoutStopSec = lib.mkDefault 10; - systemd.services."user@".serviceConfig.TimeoutStopSec = lib.mkDefault 10; + boot.initrd.systemd.settings.Manager.DefaultTimeoutStartSec = 30; + systemd.settings.Manager.DefaultTimeoutStopSec = 10; + systemd.services."user@".serviceConfig.TimeoutStopSec = 10; services.openssh.enable = true; diff --git a/shell.nix b/shell.nix index 83b3391..f8bb9a7 100644 --- a/shell.nix +++ b/shell.nix @@ -1,31 +1,13 @@ with import { }; -let - uvmPkgs = callPackage ./pkgs { }; -in mkShell.override { stdenv = stdenvNoCC; } { - inputsFrom = with uvmPkgs; [ - ch-proxy - taps - writeErofsLayers - request-usb + packages = map lib.getBin [ + cloud-hypervisor + virtiofsd + crosvm # virtio-gpu + npins + ] ++ [ + man-pages + linux-manual ]; - packages = - map lib.getBin [ - uvmPkgs.cloud-hypervisor-gpu - virtiofsd - crosvm # virtio-gpu - npins - strace - bubblewrap - python3 - execline - s6 - wayland-proxy-virtwl - uvmPkgs.taps - ] - ++ [ - man-pages - linux-manual - ]; }