{ config, lib, pkgs, ... }: # It is not the intent to stick to the microvm.nix-like static interface, # but we shall begin by reproducing at least some of their work. let cfg = config.uvms.cloud-hypervisor; inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; inherit (lib) mkOption types concatMapStringsSep getExe getExe' getBin ; package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: { patches = oldAttrs.patches or [ ] ++ [ # ../patches/ch.patch ]; buildType = "debug"; dontStrip = true; }); uvmsPkgs = pkgs.callPackage ../pkgs { }; chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings; uvmPrefix = "\${HOME}/uvms/${hostName}"; vmmSock = "${uvmPrefix}/vmm.sock"; elbPrefix = "${lib.getBin pkgs.execline}/bin"; s6Prefix = "${lib.getBin pkgs.s6}/bin"; writeElb = name: text: writeElb' name "-W" text; writeElb' = name: elArgs: text: pkgs.writeTextFile { inherit name; destination = "/bin/${name}"; executable = true; text = '' #!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs} importas OLDPATH PATH export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}" ${text} ''; }; in { options = { uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; uvms.cloud-hypervisor.runner = mkOption { type = types.package; description = "A naive script for running this system in cloud-hypervisor"; }; uvms.cloud-hypervisor.debugger = mkOption { type = types.lazyAttrsOf types.anything; description = "Same but you can debug the kernel"; }; uvms.cloud-hypervisor.settingsFile = mkOption { type = types.package; default = chSettingsFile; defaultText = "..."; readOnly = true; }; uvms.cloud-hypervisor.settings = mkOption { default = { }; type = types.submodule { freeformType = (pkgs.formats.json { }).type; options = { payload = { cmdline = mkOption { type = types.str; }; kernel = mkOption { type = types.str; }; initramfs = mkOption { type = types.str; default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; }; }; vsock = { cid = mkOption { type = types.int; default = 4; }; socket = mkOption { type = types.str; default = "vsock.sock"; }; }; "api-socket" = mkOption { type = types.str; default = "vmm.sock"; }; "serial".mode = mkOption { type = types.str; default = "File"; }; "serial".file = mkOption { type = types.nullOr types.str; default = "serial"; }; "console".mode = mkOption { type = types.str; default = "Pty"; }; "console".file = mkOption { type = types.nullOr types.str; default = null; }; # "watchdog" = true; # "seccomp" = true; }; }; }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; }; uvms.cloud-hypervisor.cmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ "earlyprintk=ttyS0" "console=ttyS0" "reboot=t" "panic=-1" "init=${config.system.build.toplevel}/init" ] ++ config.boot.kernelParams ++ config.uvms.cloud-hypervisor.extraCmdline; }; }; config = lib.mkMerge [ { uvms.cloud-hypervisor.settings = { payload = { cmdline = lib.concatStringsSep " " cfg.cmdline; kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; }; disks = map (img: { path = img; readonly = true; id = toString img.label; }) layers; memory = { size = 1536 * 1048576; shared = true; mergeable = true; # hotplugged_size = 512 * 1048576; # hotplugd_size = 1536 * 1048576; # hotplug_method = "virtio-mem" }; cpus = { boot_vcpus = 4; max_vcpus = 4; }; }; uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( { config, ... }: { name = "test-run-${hostName}"; passthru = rec { inherit (config.nodes.machine.system.build) gdbScript; inherit (config.nodes.machine.boot.kernelPackages) kernel; kernelSrc = pkgs.srcOnly kernel; }; nodes.machine = { config, ... }: let kernel = config.boot.kernelPackages.kernel; kernelSrc = pkgs.srcOnly kernel; gdbScript = writeElb "attach-gdb" '' if { rm -rf /tmp/gdb } if { mkdir -p /tmp/gdb/kos } cd /tmp/gdb if { elglob -0 files ${kernelSrc}/* forx -E f { $files } ln -s $f ./ } if { mkdir -p build } cd build if { forx -E pattern { ${kernel.modules}/lib/modules/*/kernel/drivers/net/tun* ${kernel.modules}/lib/modules/*/kernel/drivers/net/tap* } elglob -0 files $pattern forx -E f { $files } if { cp $f . } backtick -E COMPRESSED { basename $f } xz -d $COMPRESSED } elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb if { if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts } mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py } ${getExe pkgs.gdb} -ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")" -ex "target remote :1234" -ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py" -ex "lx-symbols" ${kernel.dev}/vmlinux ''; in { boot.kernelPackages = pkgs.linuxPackagesFor ( (pkgs.linux.override (oldArgs: { # extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [ # "scripts_gdb" # ]; kernelPatches = oldArgs.kernelPatches or [ ] ++ [ { name = "debug"; patch = null; structuredExtraConfig = { GDB_SCRIPTS = lib.kernel.yes; DEBUG_INFO = lib.kernel.yes; DEBUG_INFO_REDUCED = lib.kernel.no; # FRAME_POINTER = lib.kernel.yes; # "unused option"??? KALLSYMS = lib.kernel.yes; KGDB = lib.kernel.yes; }; } ]; })).overrideAttrs (oldAttrs: { dontStrip = true; postInstall = oldAttrs.postInstall or "" + '' cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist" ''; }) ); boot.kernelParams = [ "nokaslr" ]; networking.useNetworkd = true; virtualisation.qemu.options = [ "-s" ]; environment.systemPackages = [ pkgs.gdb package # CH cfg.runner uvmsPkgs.taps ]; system.build.gdbScript = gdbScript; systemd.services.taps = { wantedBy = [ "multi-user.target" ]; environment.TAPS_SOCK = "/run/taps/taps.sock"; serviceConfig = { UMask = "0007"; ExecStart = "${getExe uvmsPkgs.taps} serve"; RuntimeDirectory = "taps"; DynamicUser = true; AmbientCapabilities = [ "CAP_NET_BIND_SERVICE" "CAP_NET_ADMIN" ]; NoNewPrivileges = true; }; }; }; testScript = '' machine.succeed("${getExe cfg.runner}") ''; } ); # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum uvms.cloud-hypervisor.runner = let addProcess = getExe addProcess'; addProcess' = pkgs.writers.writePython3Bin "add-process" { } '' import os import select import socket import subprocess import sys from argparse import ArgumentParser from contextlib import contextmanager, ExitStack from threading import Thread, Semaphore parser = ArgumentParser() parser.add_argument("events_path") parser.add_argument("--then", action="append") MSG_SIZE = 16 SHMEM = {} def send(sock, msg): assert len(msg) <= MSG_SIZE, len(msg) return sock.send(msg.ljust(MSG_SIZE)) def recv(sock): msg = sock.recv(MSG_SIZE) # assert len(msg) <= MSG_SIZE, len(msg) assert len(msg) <= MSG_SIZE, len(msg) return (msg.split() + [b""])[0] def serve_impl(events_path, listener): SHMEM["server"] = True cons = [] state = "up" while state == "up" or cons != []: if state == "up": rs, ws, es = select.select([listener, *cons], [], []) else: rs, ws, es = select.select(cons, cons, []) events = [] for r in rs: if r is listener: r, _ = r.accept() cons.append(r) else: events.append(recv(r)) if any(e == b"killall" for e in events): state = "down" if state == "down": for w in ws: with s_lock: send(w, b"die") w.close() cons.remove(w) for w in es: w.close() cons.remove(w) def serve(events_path): base_dir = os.path.dirname(events_path) if base_dir: os.makedirs(base_dir, exist_ok=True) listener = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) listener.setblocking(False) try: listener.bind(events_path) listener.listen() return serve_impl(events_path, listener) except OSError as e: EADDRINUSE = 98 if e.errno != EADDRINUSE: raise finally: listener.close() os.remove(events_path) def register(events_path): sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) sock.connect(events_path) return sock @contextmanager def defer(f): try: yield finally: f() if __name__ == "__main__": args, args_next = parser.parse_known_args() with ExitStack() as cleanup: if args_next: p = subprocess.Popen( args_next, shell=False) then_cmds = reversed(getattr(args, "then") or []) if not args_next: then_cmds = [] try: p.wait(0.5) then_cmds = [] except subprocess.TimeoutExpired: pass for f in then_cmds: def run_f(): subprocess.run(f) cleanup.enter_context(defer(run_f)) maybe_server = Thread( target=serve, args=(args.events_path,), daemon=True) maybe_server.start() maybe_server.join(0.5) assert ( ("server" in SHMEM) == bool(maybe_server.is_alive) ), (SHMEM, maybe_server) if args_next: s = register(args.events_path) s_lock = Semaphore() if args_next: def watch_p(p, s): p.wait() with s_lock: try: send(s, b"killall") except BrokenPipeError: pass def watch_s(p, s): while True: if recv(s) == b"die": p.terminate() break s_watcher = Thread( target=watch_s, args=(p, s), daemon=True) s_watcher.start() watch_p(p, s) s_watcher.join() s.close() if SHMEM.get("server", False): maybe_server.join() exit_code = 0 if args_next: exit_code |= p.returncode sys.exit(exit_code) ''; ch = getExe package; chr = getExe' package "ch-remote"; in writeElb "run-${hostName}" '' importas -i HOME HOME importas -SsD "${chr} --api-socket=${vmmSock}" CHR importas -SsD "${uvmPrefix}" PREFIX define EVENTS ''${PREFIX}/events.sock define -s ADD_PROC "${addProcess} ''${EVENTS}" cd $PREFIX background { $ADD_PROC --then ${getExe ( writeElb "rm-vmmsock" '' importas -i HOME HOME rm -f ${vmmSock} rm -f ${uvmPrefix}/vsock.sock '' )} ${getExe ( writeElb "ch" '' importas -Si 1 importas -Si 2 s6-ipcserver-socketbinder -B $1 exec -a "uuvm/''${2} cloud-hypervisor" ${ch} --api-socket fd=0 '' )} ${vmmSock} ${hostName} } foreground { sleep 0.1 } ifelse -n { test -S ${vmmSock} } { echo "Apparently ${vmmSock} does not exist" } foreground { echo "Loading the configuration" } if { $CHR create ${chSettingsFile} } foreground { echo "Adding TAP" } if { ${lib.getExe uvmsPkgs.taps} pass $CHR add-net "id=wan,fd=3,mac=00:00:00:00:00:01" } foreground { echo "Booting" } if { $CHR boot } if { $CHR info } ''; } (lib.mkIf cfg.enable { boot.initrd.availableKernelModules = [ "erofs" "overlay" "virtio_mmio" "virtio_pci" "virtio_blk" # "9pnet_virtio" # "9p" "virtiofs" ]; boot.initrd.systemd.enable = lib.mkDefault true; fileSystems = { "/nix/store" = { fsType = "overlay"; overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers; neededForBoot = true; }; } // lib.listToAttrs ( map ( img: lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" { device = "/dev/disk/by-label/${img.label}"; neededForBoot = true; options = [ "x-systemd.device-timeout=5" ]; } ) layers ); }) ]; }