diff --git a/examples/dummy.nix b/examples/dummy.nix index 7d8c813..4689af1 100644 --- a/examples/dummy.nix +++ b/examples/dummy.nix @@ -13,8 +13,6 @@ system.stateVersion = "25.11"; - networking.hostName = "dummy"; - vmapps.enable = true; _module.args.inputs = import ../npins; diff --git a/pkgs/ch-proxy/meson.build b/pkgs/ch-proxy/meson.build index 379f96f..e3976a2 100644 --- a/pkgs/ch-proxy/meson.build +++ b/pkgs/ch-proxy/meson.build @@ -1,9 +1,3 @@ project('ch-proxy', 'c') -pkg = import('pkgconfig') - -sendfd = library('sendfd', [ 'sendfd.c', 'sendfd.h' ], install: true) -pkg.generate(sendfd) -install_headers('sendfd.h') - -executable('ch-proxy', 'proxy.c', link_with: [sendfd], install: true) +executable('ch-proxy', 'proxy.c', install: true) diff --git a/pkgs/ch-proxy/package.nix b/pkgs/ch-proxy/package.nix index e94eec8..9fd3b21 100644 --- a/pkgs/ch-proxy/package.nix +++ b/pkgs/ch-proxy/package.nix @@ -8,7 +8,6 @@ stdenv.mkDerivation { pname = "ch-proxy"; version = "0.0.0"; - outputs = [ "out" "lib" ]; nativeBuildInputs = [ meson ninja @@ -20,8 +19,6 @@ stdenv.mkDerivation { fs.toSource { fileset = fs.unions [ ./proxy.c - ./sendfd.c - ./sendfd.h ./meson.build ]; root = ./.; diff --git a/pkgs/ch-proxy/proxy.c b/pkgs/ch-proxy/proxy.c index 46730e6..ed1dea0 100644 --- a/pkgs/ch-proxy/proxy.c +++ b/pkgs/ch-proxy/proxy.c @@ -9,9 +9,9 @@ #include -#include "sendfd.h" - +struct msghdr mk_msghdr(); int ch_connect(const char*, const char*); +ssize_t send_fd(int, int); #define _WRITE_CONFIRM(fd, buf, buflen) {if (write((fd), (buf), (buflen)) != (buflen)) { perror("ch-proxy/write/partial write"); exit(EXIT_FAILURE); }} @@ -168,13 +168,19 @@ int main(int argc, char** argv) { exit(EXIT_FAILURE); } - if (send_fd(1, s, NULL) == -1) { + if (send_fd(1, s) == -1) { perror("ssh-vsock-proxy/main/send_fd"); return EXIT_FAILURE; } return 0; } +struct msghdr mk_msghdr() { + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + + return msg; +} int ch_connect(const char *path, const char *port) { int s = socket(AF_UNIX, SOCK_STREAM, 0); @@ -206,3 +212,38 @@ int ch_connect(const char *path, const char *port) { return s; } + +ssize_t send_fd(int dst_fd, int fd) { + struct msghdr msg = mk_msghdr(); + + /* openssh expects to receive a dummy length=1 iovec? */ + char ch; + struct iovec vec; + vec.iov_base = &ch; + vec.iov_len = 1; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + + union { + struct cmsghdr align; + char buf[CMSG_SPACE(sizeof(int))]; + } u; + + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + struct cmsghdr *cmptr; + cmptr = CMSG_FIRSTHDR(&msg); + + if (cmptr == NULL) { + fprintf(stderr, "ch-proxy/send_fd/CMSG_FIRSTHDR: failed to initialize msg_control\n"); + exit(EXIT_FAILURE); + } + + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + *((int*) CMSG_DATA(cmptr)) = fd; + + return (sendmsg(dst_fd, &msg, 0)); +} diff --git a/pkgs/ch-proxy/sendfd.c b/pkgs/ch-proxy/sendfd.c deleted file mode 100644 index b20e284..0000000 --- a/pkgs/ch-proxy/sendfd.c +++ /dev/null @@ -1,74 +0,0 @@ -#include "sendfd.h" -#include "sys/socket.h" /* cmsghdr */ -#include "stdio.h" /* perror */ - - -ssize_t send_fd(int dst_fd, int fd, const struct iovec *iov) { - struct msghdr msg = { 0 }; - - /* openssh expects to receive a dummy length=1 iovec? */ - char ch = 0; - struct iovec vecDefault = { 0 }; - vecDefault.iov_base = &ch; - vecDefault.iov_len = 1; - msg.msg_iov = iov == NULL ? &vecDefault : iov; - msg.msg_iovlen = 1; - - union { - struct cmsghdr align; - char buf[CMSG_SPACE(sizeof(int))]; - } u; - - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - struct cmsghdr *cmptr; - cmptr = CMSG_FIRSTHDR(&msg); - - if (cmptr == NULL) { - perror("ch-proxy/send_fd/CMSG_FIRSTHDR: failed to initialize msg_control\n"); - } - - cmptr->cmsg_len = CMSG_LEN(sizeof(int)); - cmptr->cmsg_level = SOL_SOCKET; - cmptr->cmsg_type = SCM_RIGHTS; - *((int*) CMSG_DATA(cmptr)) = fd; - - return (sendmsg(dst_fd, &msg, 0)); -} - -int recv_fd(int sock, int flags) { - int out = -1; - - struct msghdr msg = { 0 }; - struct cmsghdr *cmsg = NULL; - struct iovec iov = { 0 }; - char dummy = 0; - - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - iov.iov_base = &dummy; - iov.iov_len = sizeof(dummy); - - union { - struct cmsghdr align; - char buf[CMSG_SPACE(sizeof(int))]; - } u; - - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - int bytes = 0; - if ((bytes = recvmsg(sock, &msg, flags)) < 0) { - perror("recv_fd: recvmsg"); - return -1; - } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != SOL_SOCKET) { continue; } - if (cmsg->cmsg_type != SCM_RIGHTS) { continue; } - if (CMSG_LEN(cmsg) < sizeof(out)) { continue; } - out = *(int*)CMSG_DATA(cmsg); - } - return out; -} diff --git a/pkgs/ch-proxy/sendfd.h b/pkgs/ch-proxy/sendfd.h deleted file mode 100644 index fc1d2f8..0000000 --- a/pkgs/ch-proxy/sendfd.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _CH_PROXY_SENFD -#define _CH_PROXY_SENFD - -#include /* size_t */ -#include /* ssize_t */ -#include /* iovec */ - - -/* send_fd(chanFd, fd, *iov) - * - * chanFd: fd to sendmsg over; - * fd: fd to send; - * iov: extra data to send or NULL; - * - * returns: result of sendmsg, - * i.e. the number of bytes sent */ -ssize_t send_fd(int chanFd, int fd, const struct iovec *); - -/* recv_fd(chanFd, flags) - * - * chanFd: fd to recvmsg from; - * flags: recvmsg flags e.g. 0, or MSG_CMSG_CLOEXEC? - * - * returns: the received fd or -1 */ -int recv_fd(int chanFd, int flags); - -#endif /* _CH_PROXY_SENFD */ diff --git a/pkgs/taps/.envrc b/pkgs/taps/.envrc deleted file mode 100644 index 35f8c10..0000000 --- a/pkgs/taps/.envrc +++ /dev/null @@ -1 +0,0 @@ -use nix ../../ -A pkgs.taps diff --git a/pkgs/taps/main.c b/pkgs/taps/main.c deleted file mode 100644 index 3b276df..0000000 --- a/pkgs/taps/main.c +++ /dev/null @@ -1,342 +0,0 @@ -#define _GNU_SOURCE - -#include -#include /* secure_getenv */ -#include -#include -#include -#include -#include -#include -#include -#include -#include /* open, O_NONBLOCK, &c */ -#include -#include -#include -#include -#include -#include - -#define __UAPI_DEF_IF_IFNAMSIZ 1 -#include -#include -#include - -#include "sendfd.h" - -// From `man unix` -#define SUN_PATH_SZ 108 -#define N_CONNS 16 - -char *TEMP_PATHS[1024] = { 0 }; -int LAST_TEMP_PATH = -1; - -#define IFR_FLAGS_ALLOWED (IFF_NO_PI | IFF_TAP | IFF_TUN | IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_PERSIST) -#define IFR_FLAGS_DEFAULT (IFF_NO_PI | IFF_TAP | IFF_VNET_HDR | IFF_PERSIST) - -#define PTR_OR_DIE(expr) TRUE_OR_DIE((expr) != NULL) -#define DO_OR_DIE(expr) TRUE_OR_DIE((expr) != -1) -#define TRUE_OR_DIE(expr, ...) TRUE_OR_(EXIT_FAILURE, expr, __VA_ARGS__) -#define TRUE_OR_WARN(expr, ...) TRUE_OR_(0, expr, __VA_ARGS__) -#define TRUE_OR_(status, expr, ...) \ - do if (!(expr)) { \ - error(status, errno, "Failed assertion: " #expr "." __VA_ARGS__); \ - } while(false) - -struct allow_pattern { - // enum { USER = 1, GROUP = 2 } type; - // union { uid_t uid, gid_t gid } xid; - char *name; -}; -struct allow_patterns { - size_t n; - struct allow_pattern *patterns; -}; - -/* Running on the same host, not caring for alignment */ -struct tap_request { - short ifrFlags; /* 0 to use defaults: IFF_TAP | IFF_NO_PI | IFF_VNET_HDR */ - char name[IFNAMSIZ]; -}; - -struct tap_reply { - enum { OK = 0, AUTH_ERROR = 1 } status; - char name[IFNAMSIZ]; -}; - -int tuntap_alloc(char *dev, short openFlags, short ifrFlags, int *out_fd); - -bool match_mask(const char *test_addr, const char *expected_addr, const char *mask, int n) { - for (int octet = 0; octet < n; ++octet) { - if ((test_addr[octet] & mask[octet]) != expected_addr[octet]) { - return false; - } - } - return true; -} - -/* - * Adapted from spectrum's `mktuntap.c` (2019 Alyssa Ross - * GPL-2.0-only), which in turn adapts `tun_alloc` from - * `linux/Documentation/networking/tuntap.rst`. - * - * ifrFlags: IFF_TUN - TUN device (no Ethernet headers) - * IFF_TAP - TAP device - * - * IFF_NO_PI - Do not provide packet information - */ -int tuntap_alloc(char *dev, short openFlags, short ifrFlags, int *out_fd) { - struct ifreq ifr = { 0 }; - int fd = -1, err = 0; - - DO_OR_DIE(fd = open("/dev/net/tun", openFlags)); - - if (dev != NULL) { - int devLen = strlen(dev); - if (devLen >= IFNAMSIZ) { - /* If client requests a name, we do want the entire name to fit */ - errno = EINVAL; - return EINVAL; - } - strncpy(ifr.ifr_name, dev, IFNAMSIZ - 1); - } - ifr.ifr_flags = ifrFlags; - - TRUE_OR_WARN((err = ioctl(fd, TUNSETIFF, (void *)&ifr)) == 0); - if (err != 0) { - close(fd); - return err; - } - - strncpy(dev, ifr.ifr_name, IFNAMSIZ); - *out_fd = fd; - - { - int sz = sizeof(struct virtio_net_hdr_v1); - DO_OR_DIE(ioctl(fd, TUNSETVNETHDRSZ, &sz)); - } - return 0; -} - -int acceptRequests(const char *requestsPath, const struct allow_patterns *patterns) { - int listener; - struct sockaddr_un addr; - const int t = 1; - - DO_OR_DIE(listener = socket(AF_UNIX, SOCK_SEQPACKET, 0)); - DO_OR_DIE(setsockopt(listener, SOL_SOCKET, SO_PASSCRED, &t, sizeof(t))); - - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, requestsPath, SUN_PATH_SZ - 1); - DO_OR_DIE (bind(listener, &addr, sizeof(addr))); - PTR_OR_DIE(TEMP_PATHS[++LAST_TEMP_PATH] = strdup(requestsPath)); - - DO_OR_DIE(listen(listener, N_CONNS)); - - for (;;) { - /* Already changed my mind about looking at ucred, but keeping the code around for now */ - int sock = -1; - struct ucred cred = { 0 }; - struct msghdr msg = { 0 }; - struct cmsghdr *cmsg = NULL; - struct iovec iov = { 0 }; - struct tap_request req = { 0 }; - - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - iov.iov_base = &req; - iov.iov_len = sizeof(struct tap_request); - - DO_OR_DIE((sock = accept(listener, NULL, NULL))); - - TRUE_OR_DIE(recvmsg(sock, &msg, 0) > 0); - req.name[IFNAMSIZ] = 0; - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != SOL_SOCKET) { continue; } - if (cmsg->cmsg_type != SCM_CREDENTIALS) { continue; } - if (CMSG_LEN(cmsg) < sizeof(struct ucred)) { continue; } - memcpy(&cred, CMSG_DATA(cmsg), sizeof(struct ucred)); - break; - } - - if (req.ifrFlags == 0) { - req.ifrFlags = IFR_FLAGS_DEFAULT; - } - - bool allowed = false; - for (int i = 0; !allowed && i < patterns->n; ++i) { - bool ifnameOk = fnmatch(patterns->patterns[i].name, req.name, 0) == 0; - bool flagsOk = (req.ifrFlags & IFR_FLAGS_ALLOWED) == req.ifrFlags; - allowed = ifnameOk && flagsOk; - } - - struct tap_reply reply = { 0 }; - if (!allowed) { reply.status = AUTH_ERROR; } - if (allowed) { - /* O_CLOEXEC? */ - int fd = -1; - TRUE_OR_DIE(tuntap_alloc(req.name, O_RDWR | O_NONBLOCK, req.ifrFlags, &fd) == 0); - struct iovec iov = { 0 }; - iov.iov_base = &reply; - iov.iov_len = sizeof(struct tap_reply); - TRUE_OR_DIE(send_fd(sock, fd, &iov) > 0); - close(fd); - } - close(sock); - } - close(listener); -} - -struct allow_patterns parsePatterns(const char *raw) { - const size_t rawLen = strlen(raw); - - size_t nPatterns = 0; - for (int i = 0; i < rawLen; ++i) { - const int start = i; - if (isspace(raw[i])) { continue; } - for (; i < rawLen && !isspace(raw[i]); ++i) { } - if (start < i) { ++nPatterns; } - } - - struct allow_pattern *patterns = NULL; - PTR_OR_DIE(patterns = calloc(nPatterns, sizeof(struct allow_pattern))); - - int iPattern = 0; - for (int i = 0; i < rawLen; ++i) { - if (isspace(raw[i])) { continue; } - /* used to have per-group/per-user patterns, "u:$username:$pattern", &c - gone */ - { - const int start = i; - for (; i < rawLen && !isspace(raw[i]); ++i) { } - if (start < i) { - PTR_OR_DIE(patterns[iPattern].name = strndup(&raw[start], i - start)); - iPattern += 1; - } - } - } - struct allow_patterns out = { - .n = nPatterns, - .patterns = patterns - }; - return out; -} - -int get(const char *servePath, const char *ifname, short ifrFlags) { - /* TODO: sock: move out */ - int sock; - struct sockaddr_un addr; - - DO_OR_DIE(sock = socket(AF_UNIX, SOCK_SEQPACKET, 0)); - - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, servePath, SUN_PATH_SZ - 1); - DO_OR_DIE (connect(sock, &addr, sizeof(addr))); - - struct msghdr msg = { 0 }; - struct cmsghdr *cmsg = NULL; - struct iovec iov = { 0 }; - struct tap_request req = { 0 }; - strncpy(req.name, ifname, IFNAMSIZ - 1); - req.ifrFlags = ifrFlags; - - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - iov.iov_base = &req; - iov.iov_len = sizeof(struct tap_request); - - TRUE_OR_DIE(sendmsg(sock, &msg, 0) > 0); - - int tunFd = -1; - DO_OR_DIE(tunFd = recv_fd(sock, 0)); - close(sock); - return tunFd; -} - -void cleanup(int signo, siginfo_t *info, void *_context) { - for (int i = 0; i <= LAST_TEMP_PATH; ++i) { - TRUE_OR_DIE(unlink(TEMP_PATHS[i]) != -1 || errno == ENOENT); - } - if (signo == SIGINT) { - exit(EXIT_SUCCESS); - } - errx(EXIT_FAILURE, "Exiting with signal %d", signo); -} - -/* skarlibs under ISC */ -int uncoe (int fd) -{ - int flags = fcntl(fd, F_GETFD, 0) ; - return flags < 0 ? flags : flags & FD_CLOEXEC ? fcntl(fd, F_SETFD, flags & ~FD_CLOEXEC) : 0 ; -} - -int main(int argc, char **argv) { - struct sigaction act = { 0 }; - act.sa_flags = SA_SIGINFO; - act.sa_sigaction = cleanup; - DO_OR_DIE(sigaction(SIGINT, &act, NULL)); - DO_OR_DIE(sigaction(SIGSEGV, &act, NULL)); - - bool cmdServe = false; - bool cmdPass = false; - char *ifname = "vt-%d"; - - char **rest = argv + 1; - char **end = argv + argc; - - TRUE_OR_DIE(argc > 1); - if (strcmp(rest[0], "serve") == 0) { - cmdServe = true; - ++rest; - } else if (strcmp(rest[0], "pass") == 0) { - cmdPass = true; - ++rest; - for (; rest != end && rest[0][0] == '-'; ++rest) { - if (strcmp(rest[0], "--")) { break; } - else if (strncmp(rest[0], "--ifname=", sizeof("--ifname="))) { - ifname = rest[0] + sizeof("--ifname="); - } - } - } else { - error(EINVAL, EINVAL, "no subcommand \"%s\"", rest[0]); - } - - int nextArgc = argc - (rest - argv); - char * const* nextArgv = rest; - - const char *patternsRaw = secure_getenv("TAPS_ALLOW"); - if (patternsRaw == NULL) { - patternsRaw = "*"; - } - - struct allow_patterns patterns = { 0 }; - if (cmdServe) { - PTR_OR_DIE((patterns = parsePatterns(patternsRaw)).patterns); - } - - const char *servePath = secure_getenv("TAPS_SOCK"); - if (servePath == NULL) { - servePath = "/run/taps/taps.sock"; - } - - if (cmdServe) { - acceptRequests(servePath, &patterns); - } else if (cmdPass) { - TRUE_OR_DIE(nextArgc > 0); - int fd = -1; - DO_OR_DIE(fd = get(servePath, ifname, 0)); - if (fd != 3) { - DO_OR_DIE(dup2(fd, 3)); - close(fd); - fd = 3; - } - uncoe(fd); - DO_OR_DIE(execvp(nextArgv[0], nextArgv)); - } else { - error(EINVAL, EINVAL, "subcommand args"); - } - - return 0; -} diff --git a/pkgs/taps/meson.build b/pkgs/taps/meson.build deleted file mode 100644 index 06057b4..0000000 --- a/pkgs/taps/meson.build +++ /dev/null @@ -1,4 +0,0 @@ -project('taps', 'c') - -sendfd = dependency('sendfd') -executable('taps', 'main.c', dependencies: [sendfd], install: true) diff --git a/pkgs/taps/package.nix b/pkgs/taps/package.nix deleted file mode 100644 index c666cd9..0000000 --- a/pkgs/taps/package.nix +++ /dev/null @@ -1,44 +0,0 @@ -{ - lib, - stdenv, - meson, - pkg-config, - rustc, - ninja, - ch-proxy, -}: - -stdenv.mkDerivation { - pname = "taps"; - version = "0.0.0"; - src = - let - fs = lib.fileset; - in - fs.toSource { - root = ./.; - fileset = fs.unions [ - ./meson.build - ./main.c - ]; - }; - - nativeBuildInputs = [ - ninja - meson - pkg-config - rustc - ]; - buildInputs = [ ch-proxy ]; -} -# { lib, rustPlatform }: -# -# rustPlatform.buildRustPackage { -# pname = "taps"; -# version = "0.0.0"; -# src = let fs = lib.filesystem; in fs.toSource { -# root = ./.; -# fileset = fs.unions [ -# ]; -# }; -# }; diff --git a/profiles/ch-runner.nix b/profiles/ch-runner.nix index f156705..fbb09a4 100644 --- a/profiles/ch-runner.nix +++ b/profiles/ch-runner.nix @@ -10,112 +10,21 @@ let cfg = config.uvms.cloud-hypervisor; - - inherit (config.networking) hostName; inherit (config.debug.closure.erofs) layers; - inherit (lib) - mkOption - types - concatMapStringsSep - getExe - getExe' - getBin - ; - - package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: { - patches = oldAttrs.patches or [ ] ++ [ - # ../patches/ch.patch - ]; - buildType = "debug"; - dontStrip = true; - }); - uvmsPkgs = pkgs.callPackage ../pkgs { }; - - chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings; - - uvmPrefix = "\${HOME}/uvms/${hostName}"; - vmmSock = "${uvmPrefix}/vmm.sock"; - elbPrefix = "${lib.getBin pkgs.execline}/bin"; - s6Prefix = "${lib.getBin pkgs.s6}/bin"; - writeElb = name: text: writeElb' name "-W" text; - writeElb' = - name: elArgs: text: - pkgs.writeTextFile { - inherit name; - destination = "/bin/${name}"; - executable = true; - text = '' - #!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs} - importas OLDPATH PATH - export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}" - ${text} - ''; - }; in { options = { uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)"; - uvms.cloud-hypervisor.runner = mkOption { - type = types.package; + uvms.cloud-hypervisor.runner = lib.mkOption { + type = lib.types.package; description = "A naive script for running this system in cloud-hypervisor"; }; - uvms.cloud-hypervisor.debugger = mkOption { - type = types.lazyAttrsOf types.anything; - description = "Same but you can debug the kernel"; + uvms.cloud-hypervisor.extraArgv = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ ]; }; - uvms.cloud-hypervisor.settingsFile = mkOption { - type = types.package; - default = chSettingsFile; - defaultText = "..."; - readOnly = true; - }; - uvms.cloud-hypervisor.settings = mkOption { - default = { }; - type = types.submodule { - freeformType = (pkgs.formats.json { }).type; - options = { - payload = { - cmdline = mkOption { type = types.str; }; - kernel = mkOption { type = types.str; }; - initramfs = mkOption { - type = types.str; - default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"; - }; - }; - vsock = { - cid = mkOption { - type = types.int; - default = 4; - }; - socket = mkOption { - type = types.str; - default = "vsock.sock"; - }; - }; - "api-socket" = mkOption { - type = types.str; - default = "vmm.sock"; - }; - "serial".mode = mkOption { - type = types.str; - default = "File"; - }; - "serial".file = mkOption { - type = types.nullOr types.str; - default = "serial"; - }; - "console".mode = mkOption { - type = types.str; - default = "Pty"; - }; - "console".file = mkOption { - type = types.nullOr types.str; - default = null; - }; - # "watchdog" = true; - # "seccomp" = true; - }; - }; + uvms.cloud-hypervisor.argv = lib.mkOption { + type = lib.types.listOf lib.types.str; }; uvms.cloud-hypervisor.extraCmdline = lib.mkOption { type = lib.types.listOf lib.types.str; @@ -136,539 +45,34 @@ in }; config = lib.mkMerge [ { - uvms.cloud-hypervisor.settings = { - payload = { - cmdline = lib.concatStringsSep " " cfg.cmdline; - kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"; - }; - disks = map (img: { - path = img; - readonly = true; - id = toString img.label; - }) layers; - memory = { - size = 1536 * 1048576; - shared = true; - mergeable = true; - # hotplugged_size = 512 * 1048576; - # hotplugd_size = 1536 * 1048576; - # hotplug_method = "virtio-mem" - }; - cpus = { - boot_vcpus = 4; - max_vcpus = 4; - }; - }; - - uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest ( - { config, ... }: - { - name = "test-run-${hostName}"; - passthru = rec { - inherit (config.nodes.machine.system.build) gdbScript; - inherit (config.nodes.machine.boot.kernelPackages) kernel; - kernelSrc = pkgs.srcOnly kernel; - }; - nodes.machine = - { config, ... }: - let - kernel = config.boot.kernelPackages.kernel; - kernelSrc = pkgs.srcOnly kernel; - gdbScript = writeElb "attach-gdb" '' - if { rm -rf /tmp/gdb } - if { mkdir -p /tmp/gdb/kos } - cd /tmp/gdb - if { - elglob -0 files ${kernelSrc}/* - forx -E f { $files } - ln -s $f ./ - } - if { mkdir -p build } - cd build - if { - forx -E pattern { - ${kernel.modules}/lib/modules/*/kernel/drivers/net/tun* - ${kernel.modules}/lib/modules/*/kernel/drivers/net/tap* - } - elglob -0 files $pattern - forx -E f { $files } - if { cp $f . } - backtick -E COMPRESSED { basename $f } - xz -d $COMPRESSED - } - elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb - if { - if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts } - mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py - } - ${getExe pkgs.gdb} - -ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")" - -ex "target remote :1234" - -ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py" - -ex "lx-symbols" - ${kernel.dev}/vmlinux - ''; - in - { - boot.kernelPackages = pkgs.linuxPackagesFor ( - (pkgs.linux.override (oldArgs: { - # extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [ - # "scripts_gdb" - # ]; - kernelPatches = oldArgs.kernelPatches or [ ] ++ [ - { - name = "debug"; - patch = null; - structuredExtraConfig = { - GDB_SCRIPTS = lib.kernel.yes; - DEBUG_INFO = lib.kernel.yes; - DEBUG_INFO_REDUCED = lib.kernel.no; - # FRAME_POINTER = lib.kernel.yes; # "unused option"??? - KALLSYMS = lib.kernel.yes; - KGDB = lib.kernel.yes; - }; - } - ]; - })).overrideAttrs - (oldAttrs: { - dontStrip = true; - postInstall = oldAttrs.postInstall or "" + '' - cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist" - ''; - }) - ); - boot.kernelParams = [ "nokaslr" ]; - networking.useNetworkd = true; - virtualisation.qemu.options = [ "-s" ]; - environment.systemPackages = [ - pkgs.gdb - package # CH - cfg.runner - uvmsPkgs.taps - ]; - system.build.gdbScript = gdbScript; - systemd.services.taps = { - wantedBy = [ "multi-user.target" ]; - environment.TAPS_SOCK = "/run/taps/taps.sock"; - serviceConfig = { - UMask = "0007"; - ExecStart = "${getExe uvmsPkgs.taps} serve"; - RuntimeDirectory = "taps"; - DynamicUser = true; - AmbientCapabilities = [ - "CAP_NET_BIND_SERVICE" - "CAP_NET_ADMIN" - ]; - NoNewPrivileges = true; - }; - }; - }; - testScript = '' - machine.succeed("${getExe cfg.runner}") - ''; - } + uvms.cloud-hypervisor.argv = lib.mkBefore ( + [ + (lib.getExe pkgs.cloud-hypervisor) + "--cmdline=${lib.concatStringsSep " " cfg.cmdline}" + "--kernel=${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}" + "--initramfs=${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}" + "--vsock=cid=4,socket=vsock.sock" + "--api-socket=vmm.sock" + "--serial=tty" + "--console=null" + "--watchdog" + "--seccomp=true" + ] + ++ cfg.extraArgv ); - - # NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum - uvms.cloud-hypervisor.runner = - let - toolsClosure = pkgs.writeClosure [ - (lib.getBin pkgs.execline) - (lib.getBin pkgs.s6) - (lib.getBin package) - (lib.getBin pkgs.virtiofsd) - (lib.getBin pkgs.bubblewrap) - uvmsPkgs.taps - ]; - - superviseVm = getExe superviseVm'; - superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } '' - import os - import subprocess - import socket - from argparse import ArgumentParser - from contextlib import contextmanager, closing, ExitStack - - - parser = ArgumentParser("supervise-vm") - parser.add_argument("--vm") - parser.add_argument("--prefix", default="$HOME/uvms/$VM") - parser.add_argument("--sock", default="$PREFIX/supervisor.sock") - parser.add_argument("--vm-config") - - MSG_SIZE = 16 - ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501 - S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501 - CH_DIR = "${lib.getBin package}/bin" # noqa: E501 - UTIL_LINUX_DIR = "${lib.getBin pkgs.util-linux}/bin" # noqa: E501 - SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501 - CH_PATH = CH_DIR + "/cloud-hypervisor" - CHR_PATH = CH_DIR + "/ch-remote" - TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501 - VIRTIOFSD_PATH = "${lib.getExe pkgs.virtiofsd}" # noqa: E501 - BWRAP_PATH = "${lib.getExe pkgs.bubblewrap}" # noqa: E501 - - with open("${toolsClosure}", mode="r") as f: # noqa: E501 - CLOSURE = [ - *(ln.rstrip() for ln in f.readlines()), - "${placeholder "out"}", # noqa: E501 - ] - - PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR, UTIL_LINUX_DIR]) - PASSTHRU_ENV = { - **{ - k: v - for k, v in os.environ.items() - if k.startswith("RUST") - or k.startswith("WAYLAND") - or k in [ - "TAPS_SOCK", - ] - }, - "HOME": os.environ.get("HOME", os.getcwd()), - "PATH": PASSTHRU_PATH, - } - - - def preprocess_args(args_mut): - keys = [ - k - for k, v - in args_mut._get_kwargs() - if isinstance(v, str)] - for k in keys: - v = getattr(args_mut, k) - if "$HOME" in v: - setattr( - args_mut, - k, - v.replace("$HOME", PASSTHRU_ENV["HOME"])) - for k in keys: - v = getattr(args_mut, k) - if "$VM" in v: - setattr(args_mut, k, v.replace("$VM", args.vm)) - for k in keys: - v = getattr(args_mut, k) - if "$PREFIX" in v: - setattr(args_mut, k, v.replace("$PREFIX", args.prefix)) - return args_mut - - - class Processes: - def __init__(self, prefix, vm, check=True, **defaults): - self.prefix = prefix - self.vm = vm - self.check = check - self.defaults = defaults - - def make_env(self): - return { - **PASSTHRU_ENV, - "PATH": PASSTHRU_PATH, - "PREFIX": self.prefix, - "VM": self.vm, - } - - def exec(self, *args, **kwargs): - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - kwargs["check"] = kwargs.get("check", self.check) - kwargs["env"] = kwargs.get("env", self.make_env()) - return subprocess.run( - [*args], - **self.defaults, - **kwargs) - - def execline(self, *args, **kwargs): - return exec( - "execlineb", "-c", "\n".join(args), - **self.defaults, - executable=ELB_DIR + "/execlineb", - **{ - "env": self.make_env(), - "check": self.check, - "cwd": self.prefix, - **kwargs, - }, - ) - - def popen(self, *args, **kwargs): - kwargs["pass_fds"] = kwargs.get("pass_fds", ()) - kwargs["env"] = kwargs.get("env", self.make_env()) - kwargs["cwd"] = kwargs.get("cwd", self.prefix) - return subprocess.Popen( - args, - **kwargs, - ) - - @contextmanager - def bwrap( - self, - *bwrap_args, - - die_with_parent=True, - - # Based on the args from - # `host/rootfs/image/usr/bin/run-vmm` - unshare_all=True, - unshare_user=True, - unshare_ipc=None, - unshare_pid=None, - unshare_net=None, - unshare_uts=None, - unshare_cgroup_try=True, - bind=(), - dev_bind=("/dev/kvm", "/dev/vfio"), - dev="/dev", - proc="/proc", - ro_bind=( - "/etc", - "/sys", - "/proc/sys", - "/dev/null", - "/proc/kallsyms", - *CLOSURE), - ro_bind_extra=(), - remount_ro=("/proc/fs", "/proc/irq"), - tmpfs=("/dev/shm", "/tmp", "/var/tmp", "/proc/fs", "/proc/irq"), - tmpfs_extra=(), - - pass_fds=(2,), - **popen_kwargs): - - bwrap_args_sock, remote = socket.socketpair() - remote.set_inheritable(True) - bwrap_args_f = bwrap_args_sock.makefile("w") - with closing(bwrap_args_sock), closing(bwrap_args_f): - def print_arg(*args): - print(*args, file=bwrap_args_f, sep="\0", end="\0") - - if unshare_all: - print_arg("--unshare-all") - if unshare_user: - print_arg("--unshare-user") - if unshare_ipc: - print_arg("--unshare-ipc") - if unshare_pid: - print_arg("--unshare-pid") - if unshare_net: - print_arg("--unshare-net") - if unshare_uts: - print_arg("--unshare-uts") - if unshare_cgroup_try: - print_arg("--unshare-cgroup-try") - if die_with_parent: - print_arg("--die-with-parent") - - for p in bind: - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--bind", p1, p2) - for p in (*ro_bind, *ro_bind_extra): - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--ro-bind", p1, p2) - for p in dev_bind: - p1, p2 = (p, p) if isinstance(p, str) else p - print_arg("--dev-bind", p1, p2) - for p in (*tmpfs, *tmpfs_extra): - print_arg("--tmpfs", p) - # Hunch: order might matter... - for p in remount_ro: - print_arg("--remount-ro", p) - - bwrap_args_f.flush() - - with closing(remote): - proc = self.popen( - "bwrap", "--args", str(remote.fileno()), *bwrap_args, - **popen_kwargs, - executable=BWRAP_PATH, - pass_fds=(*pass_fds, remote.fileno()), - ) - - with proc as p: - try: - yield p - finally: - try: - p.poll() - except: # noqa: E722 - pass - if p.returncode is None: - p.terminate() - p.wait() - - @contextmanager - def run_ch(self): - args = [ - SOCKETBINDER_PATH, - "-B", - self.prefix + "/vmm.sock", - CH_PATH, - "--api-socket", - "fd=0", - ] - p = self.popen( - *args, - shell=False, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - pass_fds=(2,)) - try: - p.wait(0.125) - needs_cleanup = False - except subprocess.TimeoutExpired: - needs_cleanup = True - if not os.path.exists(self.prefix + "/vmm.sock"): - raise RuntimeError(f"{self.prefix}/vmm.sock should exist by now") - if p.returncode is not None: - raise RuntimeError("CH exited early") - try: - yield p - finally: - try: - p.poll() - except: # noqa: E722 - pass - if p.returncode is None: - p.terminate() # CH handles SIG{INT,TERM}? - p.wait() - unlink_paths = [ - self.prefix + "/vmm.sock", - self.prefix + "/vmm.sock.lock", - self.prefix + "/vsock.sock", - ] if needs_cleanup else [] - for p in unlink_paths: - if os.path.exists(p): - os.remove(p) - - @contextmanager - def add_virtiofsd( - self, - root_dir, - tag, - ro=False, - subdirs=None, - extra_flags=("--posix-acl",)): - - assert os.path.exists(root_dir) - - sock_path = self.prefix + f"/virtiofsd-{tag}.sock" - # s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - # NOTE: Nope. Virtiofsd actually expects a blocking socket - # s.setblocking(True) - - def rm_sock(): - if os.path.exists(sock_path): - os.remove(sock_path) - - with ExitStack() as cleanup: # noqa: F841 - # s.bind(sock_path.encode("utf8")) - # cleanup.enter_context(closing(s)) - cleanup.enter_context(defer(rm_sock)) - - args = [ - # If using bwrap(): - # "--argv0", "virtiofsd", - # "--uid", "1000", - # "--gid", "1000", - # "--", - "unshare", "-rUm", - "unshare", "--map-user", "1000", "--map-group", "1000", - VIRTIOFSD_PATH, - "--shared-dir", - root_dir, - "--tag", - tag, - - # "--fd", - # str(s.fileno()), - "--socket-path", - sock_path, - - # If relying on bwrap(): - # "--sandbox", - # "none", - ] - if ro: - args.append("--readonly") - kwargs = { - # If bwrap(): - # "bind": [], - # ("ro_bind_extra" if ro else "bind"): - # [*subdirs] - # if subdirs is not None - # else [root_dir], - - # "pass_fds": (2, s.fileno()), - } - proc_ctx = self.popen(*args, **kwargs) - with proc_ctx as p: - try: - try: - p.wait(0.125) - except subprocess.TimeoutExpired: - pass - if p.returncode is not None: - raise RuntimeError("virtiofsd exited too early") - yield p, sock_path - finally: - if p.returncode is None: - p.kill() - p.wait() - if os.path.exists(sock_path): - os.remove(sock_path) - - - @contextmanager - def defer(f): - try: - yield - finally: - f() - - - if __name__ == "__main__": - args, args_next = parser.parse_known_args() - preprocess_args(args) - - os.makedirs(args.prefix, exist_ok=True) - ps = Processes( - prefix=args.prefix, - vm=args.vm, - ) - - ch_remote = [ - "ch-remote", - "--api-socket", - args.prefix + "/vmm.sock", - ] - - with ExitStack() as cleanup: - ch = cleanup.enter_context(ps.run_ch()) - ps.exec(*ch_remote, "create", args.vm_config) - ps.exec( - TAPS_PATH, "pass", - *ch_remote, "add-net", - "id=wan,fd=3,mac=00:00:00:00:00:01") - - send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}" - os.makedirs(send_dir, exist_ok=True) - vfsd, vfsd_path = cleanup.enter_context( - ps.add_virtiofsd( - send_dir, - tag="send", - )) - ps.exec(*ch_remote, "add-fs", f"tag=send,socket={vfsd_path},id=send") - ps.exec(*ch_remote, "boot") - ps.exec(*ch_remote, "info") - try: - ch.wait() - except KeyboardInterrupt: - pass - ''; - in - writeElb "run-${hostName}" '' - ${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName} - ''; + uvms.cloud-hypervisor.runner = pkgs.writeShellScriptBin "run-${config.networking.hostName}" '' + set -euo pipefail + GUESTNAME=${config.networking.hostName} + args=( + ${lib.concatMapStringsSep "\n" lib.escapeShellArg cfg.argv} + ) + mkdir -p "$HOME/uvms/$GUESTNAME" + cd "$HOME/uvms/$GUESTNAME" + cleanup() { + rm "$HOME/uvms/$GUESTNAME"/{vmm,vsock}.sock + } + exec -a "uuvm/$GUESTNAME" "''${args[@]}" + ''; } (lib.mkIf cfg.enable { boot.initrd.availableKernelModules = [ @@ -699,6 +103,12 @@ in } ) layers ); + uvms.cloud-hypervisor.argv = [ + "--memory=size=1536M,hotplug_size=1536M,hotplugged_size=512M,hotplug_method=virtio-mem,mergeable=on,shared=on" + "--cpus=boot=4" + "--disk" + ] + ++ map (img: "path=${img},readonly=true,id=${toString img.label}") layers; }) ]; } diff --git a/profiles/uvms-guest.nix b/profiles/uvms-guest.nix index 281f343..e8c307d 100644 --- a/profiles/uvms-guest.nix +++ b/profiles/uvms-guest.nix @@ -41,7 +41,6 @@ in volumes = [ { image = "swapfile.img"; - serial = "swapfiles"; mountPoint = "/var/swapfiles"; size = 1024; } diff --git a/profiles/uvms-users.nix b/profiles/uvms-users.nix index e7bbacf..e75ac8f 100644 --- a/profiles/uvms-users.nix +++ b/profiles/uvms-users.nix @@ -29,6 +29,7 @@ in }; config = mergeIf cfg.enable [ { + services.getty.autologinUser = "user"; security.sudo.wheelNeedsPassword = false; users.mutableUsers = false; users.users.user = { diff --git a/shell.nix b/shell.nix deleted file mode 100644 index f8bb9a7..0000000 --- a/shell.nix +++ /dev/null @@ -1,13 +0,0 @@ -with import { }; - -mkShell.override { stdenv = stdenvNoCC; } { - packages = map lib.getBin [ - cloud-hypervisor - virtiofsd - crosvm # virtio-gpu - npins - ] ++ [ - man-pages - linux-manual - ]; -}