diff --git a/examples/dummy.nix b/examples/dummy.nix index 7d8c813..4689af1 100644 --- a/examples/dummy.nix +++ b/examples/dummy.nix @@ -13,8 +13,6 @@ system.stateVersion = "25.11"; - networking.hostName = "dummy"; - vmapps.enable = true; _module.args.inputs = import ../npins; diff --git a/pkgs/taps/main.c b/pkgs/taps/main.c index 3b276df..05edb64 100644 --- a/pkgs/taps/main.c +++ b/pkgs/taps/main.c @@ -1,6 +1,5 @@ #define _GNU_SOURCE -#include #include /* secure_getenv */ #include #include @@ -16,12 +15,10 @@ #include #include #include -#include #define __UAPI_DEF_IF_IFNAMSIZ 1 #include #include -#include #include "sendfd.h" @@ -111,11 +108,6 @@ int tuntap_alloc(char *dev, short openFlags, short ifrFlags, int *out_fd) { strncpy(dev, ifr.ifr_name, IFNAMSIZ); *out_fd = fd; - - { - int sz = sizeof(struct virtio_net_hdr_v1); - DO_OR_DIE(ioctl(fd, TUNSETVNETHDRSZ, &sz)); - } return 0; } @@ -265,13 +257,6 @@ void cleanup(int signo, siginfo_t *info, void *_context) { errx(EXIT_FAILURE, "Exiting with signal %d", signo); } -/* skarlibs under ISC */ -int uncoe (int fd) -{ - int flags = fcntl(fd, F_GETFD, 0) ; - return flags < 0 ? flags : flags & FD_CLOEXEC ? fcntl(fd, F_SETFD, flags & ~FD_CLOEXEC) : 0 ; -} - int main(int argc, char **argv) { struct sigaction act = { 0 }; act.sa_flags = SA_SIGINFO; @@ -281,7 +266,7 @@ int main(int argc, char **argv) { bool cmdServe = false; bool cmdPass = false; - char *ifname = "vt-%d"; + char *ifname = "vt%d"; char **rest = argv + 1; char **end = argv + argc; @@ -318,7 +303,7 @@ int main(int argc, char **argv) { const char *servePath = secure_getenv("TAPS_SOCK"); if (servePath == NULL) { - servePath = "/run/taps/taps.sock"; + servePath = "taps.sock"; } if (cmdServe) { @@ -332,7 +317,6 @@ int main(int argc, char **argv) { close(fd); fd = 3; } - uncoe(fd); DO_OR_DIE(execvp(nextArgv[0], nextArgv)); } else { error(EINVAL, EINVAL, "subcommand args"); diff --git a/profiles/ch-runner.nix b/profiles/ch-runner.nix index f156705..fbb09a4 100644 --- a/profiles/ch-runner.nix +++ b/profiles/ch-runner.nix @@ -10,112 +10,21 @@ let cfg = config.uvms.cloud-hypervisor; - - inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; - inherit (lib) - mkOption - types - concatMapStringsSep - getExe - getExe' - getBin - ; - - package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: { - patches = oldAttrs.patches or [ ] ++ [ - # ../patches/ch.patch - ]; - buildType = "debug"; - dontStrip = true; - }); - uvmsPkgs = pkgs.callPackage ../pkgs { }; - - chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings; - - uvmPrefix = "\${HOME}/uvms/${hostName}"; - vmmSock = "${uvmPrefix}/vmm.sock"; - elbPrefix = "${lib.getBin pkgs.execline}/bin"; - s6Prefix = "${lib.getBin pkgs.s6}/bin"; - writeElb = name: text: writeElb' name "-W" text; - writeElb' = - name: elArgs: text: - pkgs.writeTextFile { - inherit name; - destination = "/bin/${name}"; - executable = true; - text = '' - #!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs} - importas OLDPATH PATH - export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}" - ${text} - ''; - }; in { options = { uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; - uvms.cloud-hypervisor.runner = mkOption { - type = types.package; + uvms.cloud-hypervisor.runner = lib.mkOption { + type = lib.types.package; description = "A naive script for running this system in cloud-hypervisor"; }; - uvms.cloud-hypervisor.debugger = mkOption { - type = types.lazyAttrsOf types.anything; - description = "Same but you can debug the kernel"; + uvms.cloud-hypervisor.extraArgv = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ ]; }; - uvms.cloud-hypervisor.settingsFile = mkOption { - type = types.package; - default = chSettingsFile; - defaultText = "..."; - readOnly = true; - }; - uvms.cloud-hypervisor.settings = mkOption { - default = { }; - type = types.submodule { - freeformType = (pkgs.formats.json { }).type; - options = { - payload = { - cmdline = mkOption { type = types.str; }; - kernel = mkOption { type = types.str; }; - initramfs = mkOption { - type = types.str; - default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; - }; - }; - vsock = { - cid = mkOption { - type = types.int; - default = 4; - }; - socket = mkOption { - type = types.str; - default = "vsock.sock"; - }; - }; - "api-socket" = mkOption { - type = types.str; - default = "vmm.sock"; - }; - "serial".mode = mkOption { - type = types.str; - default = "File"; - }; - "serial".file = mkOption { - type = types.nullOr types.str; - default = "serial"; - }; - "console".mode = mkOption { - type = types.str; - default = "Pty"; - }; - "console".file = mkOption { - type = types.nullOr types.str; - default = null; - }; - # "watchdog" = true; - # "seccomp" = true; - }; - }; + uvms.cloud-hypervisor.argv = lib.mkOption { + type = lib.types.listOf lib.types.str; }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; @@ -136,539 +45,34 @@ in }; config = lib.mkMerge [ { - uvms.cloud-hypervisor.settings = { - payload = { - cmdline = lib.concatStringsSep " " cfg.cmdline; - kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; - }; - disks = map (img: { - path = img; - readonly = true; - id = toString img.label; - }) layers; - memory = { - size = 1536 * 1048576; - shared = true; - mergeable = true; - # hotplugged_size = 512 * 1048576; - # hotplugd_size = 1536 * 1048576; - # hotplug_method = "virtio-mem" - }; - cpus = { - boot_vcpus = 4; - max_vcpus = 4; - }; - }; - - uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( - { config, ... }: - { - name = "test-run-${hostName}"; - passthru = rec { - inherit (config.nodes.machine.system.build) gdbScript; - inherit (config.nodes.machine.boot.kernelPackages) kernel; - kernelSrc = pkgs.srcOnly kernel; - }; - nodes.machine = - { config, ... }: - let - kernel = config.boot.kernelPackages.kernel; - kernelSrc = pkgs.srcOnly kernel; - gdbScript = writeElb "attach-gdb" '' - if { rm -rf /tmp/gdb } - if { mkdir -p /tmp/gdb/kos } - cd /tmp/gdb - if { - elglob -0 files ${kernelSrc}/* - forx -E f { $files } - ln -s $f ./ - } - if { mkdir -p build } - cd build - if { - forx -E pattern { - ${kernel.modules}/lib/modules/*/kernel/drivers/net/tun* - ${kernel.modules}/lib/modules/*/kernel/drivers/net/tap* - } - elglob -0 files $pattern - forx -E f { $files } - if { cp $f . } - backtick -E COMPRESSED { basename $f } - xz -d $COMPRESSED - } - elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb - if { - if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts } - mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py - } - ${getExe pkgs.gdb} - -ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")" - -ex "target remote :1234" - -ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py" - -ex "lx-symbols" - ${kernel.dev}/vmlinux - ''; - in - { - boot.kernelPackages = pkgs.linuxPackagesFor ( - (pkgs.linux.override (oldArgs: { - # extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [ - # "scripts_gdb" - # ]; - kernelPatches = oldArgs.kernelPatches or [ ] ++ [ - { - name = "debug"; - patch = null; - structuredExtraConfig = { - GDB_SCRIPTS = lib.kernel.yes; - DEBUG_INFO = lib.kernel.yes; - DEBUG_INFO_REDUCED = lib.kernel.no; - # FRAME_POINTER = lib.kernel.yes; # "unused option"??? - KALLSYMS = lib.kernel.yes; - KGDB = lib.kernel.yes; - }; - } - ]; - })).overrideAttrs - (oldAttrs: { - dontStrip = true; - postInstall = oldAttrs.postInstall or "" + '' - cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist" - ''; - }) - ); - boot.kernelParams = [ "nokaslr" ]; - networking.useNetworkd = true; - virtualisation.qemu.options = [ "-s" ]; - environment.systemPackages = [ - pkgs.gdb - package # CH - cfg.runner - uvmsPkgs.taps - ]; - system.build.gdbScript = gdbScript; - systemd.services.taps = { - wantedBy = [ "multi-user.target" ]; - environment.TAPS_SOCK = "/run/taps/taps.sock"; - serviceConfig = { - UMask = "0007"; - ExecStart = "${getExe uvmsPkgs.taps} serve"; - RuntimeDirectory = "taps"; - DynamicUser = true; - AmbientCapabilities = [ - "CAP_NET_BIND_SERVICE" - "CAP_NET_ADMIN" - ]; - NoNewPrivileges = true; - }; - }; - }; - testScript = '' - machine.succeed("${getExe cfg.runner}") - ''; - } + uvms.cloud-hypervisor.argv = lib.mkBefore ( + [ + (lib.getExe pkgs.cloud-hypervisor) + "--cmdline=${lib.concatStringsSep " " cfg.cmdline}" + "--kernel=${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}" + "--initramfs=${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}" + "--vsock=cid=4,socket=vsock.sock" + "--api-socket=vmm.sock" + "--serial=tty" + "--console=null" + "--watchdog" + "--seccomp=true" + ] + ++ cfg.extraArgv ); - - # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum - uvms.cloud-hypervisor.runner = - let - toolsClosure = pkgs.writeClosure [ - (lib.getBin pkgs.execline) - (lib.getBin pkgs.s6) - (lib.getBin package) - (lib.getBin pkgs.virtiofsd) - (lib.getBin pkgs.bubblewrap) - uvmsPkgs.taps - ]; - - superviseVm = getExe superviseVm'; - superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } '' - import os - import subprocess - import socket - from argparse import ArgumentParser - from contextlib import contextmanager, closing, ExitStack - - - parser = ArgumentParser("supervise-vm") - parser.add_argument("--vm") - parser.add_argument("--prefix", default="$HOME/uvms/$VM") - parser.add_argument("--sock", default="$PREFIX/supervisor.sock") - parser.add_argument("--vm-config") - - MSG_SIZE = 16 - ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501 - S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501 - CH_DIR = "${lib.getBin package}/bin" # noqa: E501 - UTIL_LINUX_DIR = "${lib.getBin pkgs.util-linux}/bin" # noqa: E501 - SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 - CH_PATH = CH_DIR + "/cloud-hypervisor" - CHR_PATH = CH_DIR + "/ch-remote" - TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501 - VIRTIOFSD_PATH = "${lib.getExe pkgs.virtiofsd}" # noqa: E501 - BWRAP_PATH = "${lib.getExe pkgs.bubblewrap}" # noqa: E501 - - with open("${toolsClosure}", mode="r") as f: # noqa: E501 - CLOSURE = [ - *(ln.rstrip() for ln in f.readlines()), - "${placeholder "out"}", # noqa: E501 - ] - - PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR, UTIL_LINUX_DIR]) - PASSTHRU_ENV = { - **{ - k: v - for k, v in os.environ.items() - if k.startswith("RUST") - or k.startswith("WAYLAND") - or k in [ - "TAPS_SOCK", - ] - }, - "HOME": os.environ.get("HOME", os.getcwd()), - "PATH": PASSTHRU_PATH, - } - - - def preprocess_args(args_mut): - keys = [ - k - for k, v - in args_mut._get_kwargs() - if isinstance(v, str)] - for k in keys: - v = getattr(args_mut, k) - if "$HOME" in v: - setattr( - args_mut, - k, - v.replace("$HOME", PASSTHRU_ENV["HOME"])) - for k in keys: - v = getattr(args_mut, k) - if "$VM" in v: - setattr(args_mut, k, v.replace("$VM", args.vm)) - for k in keys: - v = getattr(args_mut, k) - if "$PREFIX" in v: - setattr(args_mut, k, v.replace("$PREFIX", args.prefix)) - return args_mut - - - class Processes: - def __init__(self, prefix, vm, check=True, **defaults): - self.prefix = prefix - self.vm = vm - self.check = check - self.defaults = defaults - - def make_env(self): - return { - **PASSTHRU_ENV, - "PATH": PASSTHRU_PATH, - "PREFIX": self.prefix, - "VM": self.vm, - } - - def exec(self, *args, **kwargs): - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - kwargs["check"] = kwargs.get("check", self.check) - kwargs["env"] = kwargs.get("env", self.make_env()) - return subprocess.run( - [*args], - **self.defaults, - **kwargs) - - def execline(self, *args, **kwargs): - return exec( - "execlineb", "-c", "\n".join(args), - **self.defaults, - executable=ELB_DIR + "/execlineb", - **{ - "env": self.make_env(), - "check": self.check, - "cwd": self.prefix, - **kwargs, - }, - ) - - def popen(self, *args, **kwargs): - kwargs["pass_fds"] = kwargs.get("pass_fds", ()) - kwargs["env"] = kwargs.get("env", self.make_env()) - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - return subprocess.Popen( - args, - **kwargs, - ) - - @contextmanager - def bwrap( - self, - *bwrap_args, - - die_with_parent=True, - - # Based on the args from - # `host/rootfs/image/usr/bin/run-vmm` - unshare_all=True, - unshare_user=True, - unshare_ipc=None, - unshare_pid=None, - unshare_net=None, - unshare_uts=None, - unshare_cgroup_try=True, - bind=(), - dev_bind=("/dev/kvm", "/dev/vfio"), - dev="/dev", - proc="/proc", - ro_bind=( - "/etc", - "/sys", - "/proc/sys", - "/dev/null", - "/proc/kallsyms", - *CLOSURE), - ro_bind_extra=(), - remount_ro=("/proc/fs", "/proc/irq"), - tmpfs=("/dev/shm", "/tmp", "/var/tmp", "/proc/fs", "/proc/irq"), - tmpfs_extra=(), - - pass_fds=(2,), - **popen_kwargs): - - bwrap_args_sock, remote = socket.socketpair() - remote.set_inheritable(True) - bwrap_args_f = bwrap_args_sock.makefile("w") - with closing(bwrap_args_sock), closing(bwrap_args_f): - def print_arg(*args): - print(*args, file=bwrap_args_f, sep="\0", end="\0") - - if unshare_all: - print_arg("--unshare-all") - if unshare_user: - print_arg("--unshare-user") - if unshare_ipc: - print_arg("--unshare-ipc") - if unshare_pid: - print_arg("--unshare-pid") - if unshare_net: - print_arg("--unshare-net") - if unshare_uts: - print_arg("--unshare-uts") - if unshare_cgroup_try: - print_arg("--unshare-cgroup-try") - if die_with_parent: - print_arg("--die-with-parent") - - for p in bind: - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--bind", p1, p2) - for p in (*ro_bind, *ro_bind_extra): - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--ro-bind", p1, p2) - for p in dev_bind: - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--dev-bind", p1, p2) - for p in (*tmpfs, *tmpfs_extra): - print_arg("--tmpfs", p) - # Hunch: order might matter... - for p in remount_ro: - print_arg("--remount-ro", p) - - bwrap_args_f.flush() - - with closing(remote): - proc = self.popen( - "bwrap", "--args", str(remote.fileno()), *bwrap_args, - **popen_kwargs, - executable=BWRAP_PATH, - pass_fds=(*pass_fds, remote.fileno()), - ) - - with proc as p: - try: - yield p - finally: - try: - p.poll() - except: # noqa: E722 - pass - if p.returncode is None: - p.terminate() - p.wait() - - @contextmanager - def run_ch(self): - args = [ - SOCKETBINDER_PATH, - "-B", - self.prefix + "/vmm.sock", - CH_PATH, - "--api-socket", - "fd=0", - ] - p = self.popen( - *args, - shell=False, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - pass_fds=(2,)) - try: - p.wait(0.125) - needs_cleanup = False - except subprocess.TimeoutExpired: - needs_cleanup = True - if not os.path.exists(self.prefix + "/vmm.sock"): - raise RuntimeError(f"{self.prefix}/vmm.sock should exist by now") - if p.returncode is not None: - raise RuntimeError("CH exited early") - try: - yield p - finally: - try: - p.poll() - except: # noqa: E722 - pass - if p.returncode is None: - p.terminate() # CH handles SIG{INT,TERM}? - p.wait() - unlink_paths = [ - self.prefix + "/vmm.sock", - self.prefix + "/vmm.sock.lock", - self.prefix + "/vsock.sock", - ] if needs_cleanup else [] - for p in unlink_paths: - if os.path.exists(p): - os.remove(p) - - @contextmanager - def add_virtiofsd( - self, - root_dir, - tag, - ro=False, - subdirs=None, - extra_flags=("--posix-acl",)): - - assert os.path.exists(root_dir) - - sock_path = self.prefix + f"/virtiofsd-{tag}.sock" - # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - # NOTE: Nope. Virtiofsd actually expects a blocking socket - # s.setblocking(True) - - def rm_sock(): - if os.path.exists(sock_path): - os.remove(sock_path) - - with ExitStack() as cleanup: # noqa: F841 - # s.bind(sock_path.encode("utf8")) - # cleanup.enter_context(closing(s)) - cleanup.enter_context(defer(rm_sock)) - - args = [ - # If using bwrap(): - # "--argv0", "virtiofsd", - # "--uid", "1000", - # "--gid", "1000", - # "--", - "unshare", "-rUm", - "unshare", "--map-user", "1000", "--map-group", "1000", - VIRTIOFSD_PATH, - "--shared-dir", - root_dir, - "--tag", - tag, - - # "--fd", - # str(s.fileno()), - "--socket-path", - sock_path, - - # If relying on bwrap(): - # "--sandbox", - # "none", - ] - if ro: - args.append("--readonly") - kwargs = { - # If bwrap(): - # "bind": [], - # ("ro_bind_extra" if ro else "bind"): - # [*subdirs] - # if subdirs is not None - # else [root_dir], - - # "pass_fds": (2, s.fileno()), - } - proc_ctx = self.popen(*args, **kwargs) - with proc_ctx as p: - try: - try: - p.wait(0.125) - except subprocess.TimeoutExpired: - pass - if p.returncode is not None: - raise RuntimeError("virtiofsd exited too early") - yield p, sock_path - finally: - if p.returncode is None: - p.kill() - p.wait() - if os.path.exists(sock_path): - os.remove(sock_path) - - - @contextmanager - def defer(f): - try: - yield - finally: - f() - - - if __name__ == "__main__": - args, args_next = parser.parse_known_args() - preprocess_args(args) - - os.makedirs(args.prefix, exist_ok=True) - ps = Processes( - prefix=args.prefix, - vm=args.vm, - ) - - ch_remote = [ - "ch-remote", - "--api-socket", - args.prefix + "/vmm.sock", - ] - - with ExitStack() as cleanup: - ch = cleanup.enter_context(ps.run_ch()) - ps.exec(*ch_remote, "create", args.vm_config) - ps.exec( - TAPS_PATH, "pass", - *ch_remote, "add-net", - "id=wan,fd=3,mac=00:00:00:00:00:01") - - send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}" - os.makedirs(send_dir, exist_ok=True) - vfsd, vfsd_path = cleanup.enter_context( - ps.add_virtiofsd( - send_dir, - tag="send", - )) - ps.exec(*ch_remote, "add-fs", f"tag=send,socket={vfsd_path},id=send") - ps.exec(*ch_remote, "boot") - ps.exec(*ch_remote, "info") - try: - ch.wait() - except KeyboardInterrupt: - pass - ''; - in - writeElb "run-${hostName}" '' - ${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName} - ''; + uvms.cloud-hypervisor.runner = pkgs.writeShellScriptBin "run-${config.networking.hostName}" '' + set -euo pipefail + GUESTNAME=${config.networking.hostName} + args=( + ${lib.concatMapStringsSep "\n" lib.escapeShellArg cfg.argv} + ) + mkdir -p "$HOME/uvms/$GUESTNAME" + cd "$HOME/uvms/$GUESTNAME" + cleanup() { + rm "$HOME/uvms/$GUESTNAME"/{vmm,vsock}.sock + } + exec -a "uuvm/$GUESTNAME" "''${args[@]}" + ''; } (lib.mkIf cfg.enable { boot.initrd.availableKernelModules = [ @@ -699,6 +103,12 @@ in } ) layers ); + uvms.cloud-hypervisor.argv = [ + "--memory=size=1536M,hotplug_size=1536M,hotplugged_size=512M,hotplug_method=virtio-mem,mergeable=on,shared=on" + "--cpus=boot=4" + "--disk" + ] + ++ map (img: "path=${img},readonly=true,id=${toString img.label}") layers; }) ]; }