{ config, lib, pkgs, ... }: # It is not the intent to stick to the microvm.nix-like static interface, # but we shall begin by reproducing at least some of their work. let cfg = config.uvms.cloud-hypervisor; inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; inherit (lib) mkOption types concatMapStringsSep getExe getExe' getBin ; package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: { patches = oldAttrs.patches or [ ] ++ [ # ../patches/ch.patch ]; buildType = "debug"; dontStrip = true; }); uvmsPkgs = pkgs.callPackage ../pkgs { }; chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings; uvmPrefix = "\${HOME}/uvms/${hostName}"; vmmSock = "${uvmPrefix}/vmm.sock"; elbPrefix = "${lib.getBin pkgs.execline}/bin"; s6Prefix = "${lib.getBin pkgs.s6}/bin"; writeElb = name: text: writeElb' name "-W" text; writeElb' = name: elArgs: text: pkgs.writeTextFile { inherit name; destination = "/bin/${name}"; executable = true; text = '' #!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs} importas OLDPATH PATH export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}" ${text} ''; }; in { options = { uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; uvms.cloud-hypervisor.runner = mkOption { type = types.package; description = "A naive script for running this system in cloud-hypervisor"; }; uvms.cloud-hypervisor.debugger = mkOption { type = types.lazyAttrsOf types.anything; description = "Same but you can debug the kernel"; }; uvms.cloud-hypervisor.settingsFile = mkOption { type = types.package; default = chSettingsFile; defaultText = "..."; readOnly = true; }; uvms.cloud-hypervisor.settings = mkOption { default = { }; type = types.submodule { freeformType = (pkgs.formats.json { }).type; options = { payload = { cmdline = mkOption { type = types.str; }; kernel = mkOption { type = types.str; }; initramfs = mkOption { type = types.str; default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; }; }; vsock = { cid = mkOption { type = types.int; default = 4; }; socket = mkOption { type = types.str; default = "vsock.sock"; }; }; "api-socket" = mkOption { type = types.str; default = "vmm.sock"; }; "serial".mode = mkOption { type = types.str; default = "File"; }; "serial".file = mkOption { type = types.nullOr types.str; default = "serial"; }; "console".mode = mkOption { type = types.str; default = "Pty"; }; "console".file = mkOption { type = types.nullOr types.str; default = null; }; # "watchdog" = true; # "seccomp" = true; }; }; }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; }; uvms.cloud-hypervisor.cmdline = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ "earlyprintk=ttyS0" "console=ttyS0" "reboot=t" "panic=-1" "init=${config.system.build.toplevel}/init" ] ++ config.boot.kernelParams ++ config.uvms.cloud-hypervisor.extraCmdline; }; }; config = lib.mkMerge [ { uvms.cloud-hypervisor.settings = { payload = { cmdline = lib.concatStringsSep " " cfg.cmdline; kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; }; disks = map (img: { path = img; readonly = true; id = toString img.label; }) layers; memory = { size = 1536 * 1048576; shared = true; mergeable = true; # hotplugged_size = 512 * 1048576; # hotplugd_size = 1536 * 1048576; # hotplug_method = "virtio-mem" }; cpus = { boot_vcpus = 4; max_vcpus = 4; }; }; uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( { config, ... }: { name = "test-run-${hostName}"; passthru = rec { inherit (config.nodes.machine.system.build) gdbScript; inherit (config.nodes.machine.boot.kernelPackages) kernel; kernelSrc = pkgs.srcOnly kernel; }; nodes.machine = { config, ... }: let kernel = config.boot.kernelPackages.kernel; kernelSrc = pkgs.srcOnly kernel; gdbScript = writeElb "attach-gdb" '' if { rm -rf /tmp/gdb } if { mkdir -p /tmp/gdb/kos } cd /tmp/gdb if { elglob -0 files ${kernelSrc}/* forx -E f { $files } ln -s $f ./ } if { mkdir -p build } cd build if { forx -E pattern { ${kernel.modules}/lib/modules/*/kernel/drivers/net/tun* ${kernel.modules}/lib/modules/*/kernel/drivers/net/tap* } elglob -0 files $pattern forx -E f { $files } if { cp $f . } backtick -E COMPRESSED { basename $f } xz -d $COMPRESSED } elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb if { if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts } mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py } ${getExe pkgs.gdb} -ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")" -ex "target remote :1234" -ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py" -ex "lx-symbols" ${kernel.dev}/vmlinux ''; in { boot.kernelPackages = pkgs.linuxPackagesFor ( (pkgs.linux.override (oldArgs: { # extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [ # "scripts_gdb" # ]; kernelPatches = oldArgs.kernelPatches or [ ] ++ [ { name = "debug"; patch = null; structuredExtraConfig = { GDB_SCRIPTS = lib.kernel.yes; DEBUG_INFO = lib.kernel.yes; DEBUG_INFO_REDUCED = lib.kernel.no; # FRAME_POINTER = lib.kernel.yes; # "unused option"??? KALLSYMS = lib.kernel.yes; KGDB = lib.kernel.yes; }; } ]; })).overrideAttrs (oldAttrs: { dontStrip = true; postInstall = oldAttrs.postInstall or "" + '' cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist" ''; }) ); boot.kernelParams = [ "nokaslr" ]; networking.useNetworkd = true; virtualisation.qemu.options = [ "-s" ]; environment.systemPackages = [ pkgs.gdb package # CH cfg.runner uvmsPkgs.taps ]; system.build.gdbScript = gdbScript; systemd.services.taps = { wantedBy = [ "multi-user.target" ]; environment.TAPS_SOCK = "/run/taps/taps.sock"; serviceConfig = { UMask = "0007"; ExecStart = "${getExe uvmsPkgs.taps} serve"; RuntimeDirectory = "taps"; DynamicUser = true; AmbientCapabilities = [ "CAP_NET_BIND_SERVICE" "CAP_NET_ADMIN" ]; NoNewPrivileges = true; }; }; }; testScript = '' machine.succeed("${getExe cfg.runner}") ''; } ); # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum uvms.cloud-hypervisor.runner = let superviseVm = getExe superviseVm'; superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } '' import os import subprocess from argparse import ArgumentParser from contextlib import contextmanager, ExitStack parser = ArgumentParser("supervise-vm") parser.add_argument("--vm") parser.add_argument("--prefix", default="$HOME/uvms/$VM") parser.add_argument("--sock", default="$PREFIX/supervisor.sock") parser.add_argument("--vm-config") MSG_SIZE = 16 ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501 S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501 CH_DIR = "${lib.getBin package}/bin" # noqa: E501 SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 CH_PATH = CH_DIR + "/cloud-hypervisor" CHR_PATH = CH_DIR + "/ch-remote" TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501 PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR]) PASSTHRU_ENV = { **{ k: v for k, v in os.environ.items() if k.startswith("RUST") or k.startswith("WAYLAND") or k in [ "TAPS_SOCK", ] }, "HOME": os.environ.get("HOME", os.getcwd()), "PATH": PASSTHRU_PATH, } def configure_execline(prefix, vm, check=True, **defaults): def execline(*args, check=check, **kwargs): return subprocess.run( ["execlineb", "-c", "\n".join(args)], **defaults, executable=ELB_DIR + "/execlineb", env={ **PASSTHRU_ENV, "PATH": PASSTHRU_PATH, "PREFIX": prefix, "VM": vm, }, check=check, cwd=prefix, **kwargs) return execline def preprocess_args(args_mut): keys = [ k for k, v in args_mut._get_kwargs() if isinstance(v, str)] for k in keys: v = getattr(args_mut, k) if "$HOME" in v: setattr( args_mut, k, v.replace("$HOME", PASSTHRU_ENV["HOME"])) for k in keys: v = getattr(args_mut, k) if "$VM" in v: setattr(args_mut, k, v.replace("$VM", args.vm)) for k in keys: v = getattr(args_mut, k) if "$PREFIX" in v: setattr(args_mut, k, v.replace("$PREFIX", args.prefix)) return args_mut @contextmanager def defer(f): try: yield finally: f() @contextmanager def run_ch(vm_prefix): args = [ SOCKETBINDER_PATH, "-B", vm_prefix + "/vmm.sock", CH_PATH, "--api-socket", "fd=0", ] p = subprocess.Popen( args, shell=False) try: p.wait(1.0) needs_cleanup = False except subprocess.TimeoutExpired: needs_cleanup = True if not os.path.exists(vm_prefix + "/vmm.sock"): raise RuntimeError(f"{vm_prefix}/vmm.sock should exist by now") if p.returncode is not None: raise RuntimeError("CH exited early") try: yield p finally: try: p.poll() except: # noqa: E722 pass if p.returncode is None: p.terminate() # CH handles SIG{INT,TERM}? p.wait() unlink_paths = [ vm_prefix + "/vmm.sock", vm_prefix + "/vmm.sock.lock", vm_prefix + "/vsock.sock", ] if needs_cleanup else [] for p in unlink_paths: if os.path.exists(p): os.remove(p) if __name__ == "__main__": args, args_next = parser.parse_known_args() preprocess_args(args) os.makedirs(args.prefix, exist_ok=True) execline = configure_execline( prefix=args.prefix, vm=args.vm) ch_remote = [ "ch-remote", "--api-socket", args.prefix + "/vmm.sock", ] with ExitStack() as cleanup: ch = cleanup.enter_context(run_ch(args.prefix)) execline(*ch_remote, "create", args.vm_config) execline( TAPS_PATH, "pass", *ch_remote, "add-net", "id=wan,fd=3,mac=00:00:00:00:00:01") execline(*ch_remote, "boot") execline(*ch_remote, "info") ch.wait() ''; in writeElb "run-${hostName}" '' ${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName} ''; } (lib.mkIf cfg.enable { boot.initrd.availableKernelModules = [ "erofs" "overlay" "virtio_mmio" "virtio_pci" "virtio_blk" # "9pnet_virtio" # "9p" "virtiofs" ]; boot.initrd.systemd.enable = lib.mkDefault true; fileSystems = { "/nix/store" = { fsType = "overlay"; overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers; neededForBoot = true; }; } // lib.listToAttrs ( map ( img: lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" { device = "/dev/disk/by-label/${img.label}"; neededForBoot = true; options = [ "x-systemd.device-timeout=5" ]; } ) layers ); }) ]; }