Compare commits

..

No commits in common. "wip/taps" and "master" have entirely different histories.

14 changed files with 86 additions and 1151 deletions

View file

@ -13,8 +13,6 @@
system.stateVersion = "25.11";
networking.hostName = "dummy";
vmapps.enable = true;
_module.args.inputs = import ../npins;

View file

@ -1,9 +1,3 @@
project('ch-proxy', 'c')
pkg = import('pkgconfig')
sendfd = library('sendfd', [ 'sendfd.c', 'sendfd.h' ], install: true)
pkg.generate(sendfd)
install_headers('sendfd.h')
executable('ch-proxy', 'proxy.c', link_with: [sendfd], install: true)
executable('ch-proxy', 'proxy.c', install: true)

View file

@ -8,7 +8,6 @@
stdenv.mkDerivation {
pname = "ch-proxy";
version = "0.0.0";
outputs = [ "out" "lib" ];
nativeBuildInputs = [
meson
ninja
@ -20,8 +19,6 @@ stdenv.mkDerivation {
fs.toSource {
fileset = fs.unions [
./proxy.c
./sendfd.c
./sendfd.h
./meson.build
];
root = ./.;

View file

@ -9,9 +9,9 @@
#include <linux/vm_sockets.h>
#include "sendfd.h"
struct msghdr mk_msghdr();
int ch_connect(const char*, const char*);
ssize_t send_fd(int, int);
#define _WRITE_CONFIRM(fd, buf, buflen) {if (write((fd), (buf), (buflen)) != (buflen)) { perror("ch-proxy/write/partial write"); exit(EXIT_FAILURE); }}
@ -168,13 +168,19 @@ int main(int argc, char** argv) {
exit(EXIT_FAILURE);
}
if (send_fd(1, s, NULL) == -1) {
if (send_fd(1, s) == -1) {
perror("ssh-vsock-proxy/main/send_fd");
return EXIT_FAILURE;
}
return 0;
}
struct msghdr mk_msghdr() {
struct msghdr msg;
memset(&msg, 0, sizeof(msg));
return msg;
}
int ch_connect(const char *path, const char *port) {
int s = socket(AF_UNIX, SOCK_STREAM, 0);
@ -206,3 +212,38 @@ int ch_connect(const char *path, const char *port) {
return s;
}
ssize_t send_fd(int dst_fd, int fd) {
struct msghdr msg = mk_msghdr();
/* openssh expects to receive a dummy length=1 iovec? */
char ch;
struct iovec vec;
vec.iov_base = &ch;
vec.iov_len = 1;
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
union {
struct cmsghdr align;
char buf[CMSG_SPACE(sizeof(int))];
} u;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
struct cmsghdr *cmptr;
cmptr = CMSG_FIRSTHDR(&msg);
if (cmptr == NULL) {
fprintf(stderr, "ch-proxy/send_fd/CMSG_FIRSTHDR: failed to initialize msg_control\n");
exit(EXIT_FAILURE);
}
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
cmptr->cmsg_level = SOL_SOCKET;
cmptr->cmsg_type = SCM_RIGHTS;
*((int*) CMSG_DATA(cmptr)) = fd;
return (sendmsg(dst_fd, &msg, 0));
}

View file

@ -1,74 +0,0 @@
#include "sendfd.h"
#include "sys/socket.h" /* cmsghdr */
#include "stdio.h" /* perror */
ssize_t send_fd(int dst_fd, int fd, const struct iovec *iov) {
struct msghdr msg = { 0 };
/* openssh expects to receive a dummy length=1 iovec? */
char ch = 0;
struct iovec vecDefault = { 0 };
vecDefault.iov_base = &ch;
vecDefault.iov_len = 1;
msg.msg_iov = iov == NULL ? &vecDefault : iov;
msg.msg_iovlen = 1;
union {
struct cmsghdr align;
char buf[CMSG_SPACE(sizeof(int))];
} u;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
struct cmsghdr *cmptr;
cmptr = CMSG_FIRSTHDR(&msg);
if (cmptr == NULL) {
perror("ch-proxy/send_fd/CMSG_FIRSTHDR: failed to initialize msg_control\n");
}
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
cmptr->cmsg_level = SOL_SOCKET;
cmptr->cmsg_type = SCM_RIGHTS;
*((int*) CMSG_DATA(cmptr)) = fd;
return (sendmsg(dst_fd, &msg, 0));
}
int recv_fd(int sock, int flags) {
int out = -1;
struct msghdr msg = { 0 };
struct cmsghdr *cmsg = NULL;
struct iovec iov = { 0 };
char dummy = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
iov.iov_base = &dummy;
iov.iov_len = sizeof(dummy);
union {
struct cmsghdr align;
char buf[CMSG_SPACE(sizeof(int))];
} u;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
int bytes = 0;
if ((bytes = recvmsg(sock, &msg, flags)) < 0) {
perror("recv_fd: recvmsg");
return -1;
}
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level != SOL_SOCKET) { continue; }
if (cmsg->cmsg_type != SCM_RIGHTS) { continue; }
if (CMSG_LEN(cmsg) < sizeof(out)) { continue; }
out = *(int*)CMSG_DATA(cmsg);
}
return out;
}

View file

@ -1,27 +0,0 @@
#ifndef _CH_PROXY_SENFD
#define _CH_PROXY_SENFD
#include <stddef.h> /* size_t */
#include <sys/types.h> /* ssize_t */
#include <sys/uio.h> /* iovec */
/* send_fd(chanFd, fd, *iov)
*
* chanFd: fd to sendmsg over;
* fd: fd to send;
* iov: extra data to send or NULL;
*
* returns: result of sendmsg,
* i.e. the number of bytes sent */
ssize_t send_fd(int chanFd, int fd, const struct iovec *);
/* recv_fd(chanFd, flags)
*
* chanFd: fd to recvmsg from;
* flags: recvmsg flags e.g. 0, or MSG_CMSG_CLOEXEC?
*
* returns: the received fd or -1 */
int recv_fd(int chanFd, int flags);
#endif /* _CH_PROXY_SENFD */

View file

@ -1 +0,0 @@
use nix ../../ -A pkgs.taps

View file

@ -1,342 +0,0 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h> /* secure_getenv */
#include <sys/socket.h>
#include <unistd.h>
#include <sys/un.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>
#include <pwd.h>
#include <fnmatch.h>
#include <fcntl.h> /* open, O_NONBLOCK, &c */
#include <err.h>
#include <error.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <signal.h>
#include <fcntl.h>
#define __UAPI_DEF_IF_IFNAMSIZ 1
#include <linux/if_tun.h>
#include <linux/if.h>
#include <linux/virtio_net.h>
#include "sendfd.h"
// From `man unix`
#define SUN_PATH_SZ 108
#define N_CONNS 16
char *TEMP_PATHS[1024] = { 0 };
int LAST_TEMP_PATH = -1;
#define IFR_FLAGS_ALLOWED (IFF_NO_PI | IFF_TAP | IFF_TUN | IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_PERSIST)
#define IFR_FLAGS_DEFAULT (IFF_NO_PI | IFF_TAP | IFF_VNET_HDR | IFF_PERSIST)
#define PTR_OR_DIE(expr) TRUE_OR_DIE((expr) != NULL)
#define DO_OR_DIE(expr) TRUE_OR_DIE((expr) != -1)
#define TRUE_OR_DIE(expr, ...) TRUE_OR_(EXIT_FAILURE, expr, __VA_ARGS__)
#define TRUE_OR_WARN(expr, ...) TRUE_OR_(0, expr, __VA_ARGS__)
#define TRUE_OR_(status, expr, ...) \
do if (!(expr)) { \
error(status, errno, "Failed assertion: " #expr "." __VA_ARGS__); \
} while(false)
struct allow_pattern {
// enum { USER = 1, GROUP = 2 } type;
// union { uid_t uid, gid_t gid } xid;
char *name;
};
struct allow_patterns {
size_t n;
struct allow_pattern *patterns;
};
/* Running on the same host, not caring for alignment */
struct tap_request {
short ifrFlags; /* 0 to use defaults: IFF_TAP | IFF_NO_PI | IFF_VNET_HDR */
char name[IFNAMSIZ];
};
struct tap_reply {
enum { OK = 0, AUTH_ERROR = 1 } status;
char name[IFNAMSIZ];
};
int tuntap_alloc(char *dev, short openFlags, short ifrFlags, int *out_fd);
bool match_mask(const char *test_addr, const char *expected_addr, const char *mask, int n) {
for (int octet = 0; octet < n; ++octet) {
if ((test_addr[octet] & mask[octet]) != expected_addr[octet]) {
return false;
}
}
return true;
}
/*
* Adapted from spectrum's `mktuntap.c` (2019 Alyssa Ross <hi@alyssa.is>
* GPL-2.0-only), which in turn adapts `tun_alloc` from
* `linux/Documentation/networking/tuntap.rst`.
*
* ifrFlags: IFF_TUN - TUN device (no Ethernet headers)
* IFF_TAP - TAP device
*
* IFF_NO_PI - Do not provide packet information
*/
int tuntap_alloc(char *dev, short openFlags, short ifrFlags, int *out_fd) {
struct ifreq ifr = { 0 };
int fd = -1, err = 0;
DO_OR_DIE(fd = open("/dev/net/tun", openFlags));
if (dev != NULL) {
int devLen = strlen(dev);
if (devLen >= IFNAMSIZ) {
/* If client requests a name, we do want the entire name to fit */
errno = EINVAL;
return EINVAL;
}
strncpy(ifr.ifr_name, dev, IFNAMSIZ - 1);
}
ifr.ifr_flags = ifrFlags;
TRUE_OR_WARN((err = ioctl(fd, TUNSETIFF, (void *)&ifr)) == 0);
if (err != 0) {
close(fd);
return err;
}
strncpy(dev, ifr.ifr_name, IFNAMSIZ);
*out_fd = fd;
{
int sz = sizeof(struct virtio_net_hdr_v1);
DO_OR_DIE(ioctl(fd, TUNSETVNETHDRSZ, &sz));
}
return 0;
}
int acceptRequests(const char *requestsPath, const struct allow_patterns *patterns) {
int listener;
struct sockaddr_un addr;
const int t = 1;
DO_OR_DIE(listener = socket(AF_UNIX, SOCK_SEQPACKET, 0));
DO_OR_DIE(setsockopt(listener, SOL_SOCKET, SO_PASSCRED, &t, sizeof(t)));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, requestsPath, SUN_PATH_SZ - 1);
DO_OR_DIE (bind(listener, &addr, sizeof(addr)));
PTR_OR_DIE(TEMP_PATHS[++LAST_TEMP_PATH] = strdup(requestsPath));
DO_OR_DIE(listen(listener, N_CONNS));
for (;;) {
/* Already changed my mind about looking at ucred, but keeping the code around for now */
int sock = -1;
struct ucred cred = { 0 };
struct msghdr msg = { 0 };
struct cmsghdr *cmsg = NULL;
struct iovec iov = { 0 };
struct tap_request req = { 0 };
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
iov.iov_base = &req;
iov.iov_len = sizeof(struct tap_request);
DO_OR_DIE((sock = accept(listener, NULL, NULL)));
TRUE_OR_DIE(recvmsg(sock, &msg, 0) > 0);
req.name[IFNAMSIZ] = 0;
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level != SOL_SOCKET) { continue; }
if (cmsg->cmsg_type != SCM_CREDENTIALS) { continue; }
if (CMSG_LEN(cmsg) < sizeof(struct ucred)) { continue; }
memcpy(&cred, CMSG_DATA(cmsg), sizeof(struct ucred));
break;
}
if (req.ifrFlags == 0) {
req.ifrFlags = IFR_FLAGS_DEFAULT;
}
bool allowed = false;
for (int i = 0; !allowed && i < patterns->n; ++i) {
bool ifnameOk = fnmatch(patterns->patterns[i].name, req.name, 0) == 0;
bool flagsOk = (req.ifrFlags & IFR_FLAGS_ALLOWED) == req.ifrFlags;
allowed = ifnameOk && flagsOk;
}
struct tap_reply reply = { 0 };
if (!allowed) { reply.status = AUTH_ERROR; }
if (allowed) {
/* O_CLOEXEC? */
int fd = -1;
TRUE_OR_DIE(tuntap_alloc(req.name, O_RDWR | O_NONBLOCK, req.ifrFlags, &fd) == 0);
struct iovec iov = { 0 };
iov.iov_base = &reply;
iov.iov_len = sizeof(struct tap_reply);
TRUE_OR_DIE(send_fd(sock, fd, &iov) > 0);
close(fd);
}
close(sock);
}
close(listener);
}
struct allow_patterns parsePatterns(const char *raw) {
const size_t rawLen = strlen(raw);
size_t nPatterns = 0;
for (int i = 0; i < rawLen; ++i) {
const int start = i;
if (isspace(raw[i])) { continue; }
for (; i < rawLen && !isspace(raw[i]); ++i) { }
if (start < i) { ++nPatterns; }
}
struct allow_pattern *patterns = NULL;
PTR_OR_DIE(patterns = calloc(nPatterns, sizeof(struct allow_pattern)));
int iPattern = 0;
for (int i = 0; i < rawLen; ++i) {
if (isspace(raw[i])) { continue; }
/* used to have per-group/per-user patterns, "u:$username:$pattern", &c - gone */
{
const int start = i;
for (; i < rawLen && !isspace(raw[i]); ++i) { }
if (start < i) {
PTR_OR_DIE(patterns[iPattern].name = strndup(&raw[start], i - start));
iPattern += 1;
}
}
}
struct allow_patterns out = {
.n = nPatterns,
.patterns = patterns
};
return out;
}
int get(const char *servePath, const char *ifname, short ifrFlags) {
/* TODO: sock: move out */
int sock;
struct sockaddr_un addr;
DO_OR_DIE(sock = socket(AF_UNIX, SOCK_SEQPACKET, 0));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, servePath, SUN_PATH_SZ - 1);
DO_OR_DIE (connect(sock, &addr, sizeof(addr)));
struct msghdr msg = { 0 };
struct cmsghdr *cmsg = NULL;
struct iovec iov = { 0 };
struct tap_request req = { 0 };
strncpy(req.name, ifname, IFNAMSIZ - 1);
req.ifrFlags = ifrFlags;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
iov.iov_base = &req;
iov.iov_len = sizeof(struct tap_request);
TRUE_OR_DIE(sendmsg(sock, &msg, 0) > 0);
int tunFd = -1;
DO_OR_DIE(tunFd = recv_fd(sock, 0));
close(sock);
return tunFd;
}
void cleanup(int signo, siginfo_t *info, void *_context) {
for (int i = 0; i <= LAST_TEMP_PATH; ++i) {
TRUE_OR_DIE(unlink(TEMP_PATHS[i]) != -1 || errno == ENOENT);
}
if (signo == SIGINT) {
exit(EXIT_SUCCESS);
}
errx(EXIT_FAILURE, "Exiting with signal %d", signo);
}
/* skarlibs under ISC */
int uncoe (int fd)
{
int flags = fcntl(fd, F_GETFD, 0) ;
return flags < 0 ? flags : flags & FD_CLOEXEC ? fcntl(fd, F_SETFD, flags & ~FD_CLOEXEC) : 0 ;
}
int main(int argc, char **argv) {
struct sigaction act = { 0 };
act.sa_flags = SA_SIGINFO;
act.sa_sigaction = cleanup;
DO_OR_DIE(sigaction(SIGINT, &act, NULL));
DO_OR_DIE(sigaction(SIGSEGV, &act, NULL));
bool cmdServe = false;
bool cmdPass = false;
char *ifname = "vt-%d";
char **rest = argv + 1;
char **end = argv + argc;
TRUE_OR_DIE(argc > 1);
if (strcmp(rest[0], "serve") == 0) {
cmdServe = true;
++rest;
} else if (strcmp(rest[0], "pass") == 0) {
cmdPass = true;
++rest;
for (; rest != end && rest[0][0] == '-'; ++rest) {
if (strcmp(rest[0], "--")) { break; }
else if (strncmp(rest[0], "--ifname=", sizeof("--ifname="))) {
ifname = rest[0] + sizeof("--ifname=");
}
}
} else {
error(EINVAL, EINVAL, "no subcommand \"%s\"", rest[0]);
}
int nextArgc = argc - (rest - argv);
char * const* nextArgv = rest;
const char *patternsRaw = secure_getenv("TAPS_ALLOW");
if (patternsRaw == NULL) {
patternsRaw = "*";
}
struct allow_patterns patterns = { 0 };
if (cmdServe) {
PTR_OR_DIE((patterns = parsePatterns(patternsRaw)).patterns);
}
const char *servePath = secure_getenv("TAPS_SOCK");
if (servePath == NULL) {
servePath = "/run/taps/taps.sock";
}
if (cmdServe) {
acceptRequests(servePath, &patterns);
} else if (cmdPass) {
TRUE_OR_DIE(nextArgc > 0);
int fd = -1;
DO_OR_DIE(fd = get(servePath, ifname, 0));
if (fd != 3) {
DO_OR_DIE(dup2(fd, 3));
close(fd);
fd = 3;
}
uncoe(fd);
DO_OR_DIE(execvp(nextArgv[0], nextArgv));
} else {
error(EINVAL, EINVAL, "subcommand args");
}
return 0;
}

View file

@ -1,4 +0,0 @@
project('taps', 'c')
sendfd = dependency('sendfd')
executable('taps', 'main.c', dependencies: [sendfd], install: true)

View file

@ -1,44 +0,0 @@
{
lib,
stdenv,
meson,
pkg-config,
rustc,
ninja,
ch-proxy,
}:
stdenv.mkDerivation {
pname = "taps";
version = "0.0.0";
src =
let
fs = lib.fileset;
in
fs.toSource {
root = ./.;
fileset = fs.unions [
./meson.build
./main.c
];
};
nativeBuildInputs = [
ninja
meson
pkg-config
rustc
];
buildInputs = [ ch-proxy ];
}
# { lib, rustPlatform }:
#
# rustPlatform.buildRustPackage {
# pname = "taps";
# version = "0.0.0";
# src = let fs = lib.filesystem; in fs.toSource {
# root = ./.;
# fileset = fs.unions [
# ];
# };
# };

View file

@ -10,112 +10,21 @@
let
cfg = config.uvms.cloud-hypervisor;
inherit (config.networking) hostName;
inherit (config.debug.closure.erofs) layers;
inherit (lib)
mkOption
types
concatMapStringsSep
getExe
getExe'
getBin
;
package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: {
patches = oldAttrs.patches or [ ] ++ [
# ../patches/ch.patch
];
buildType = "debug";
dontStrip = true;
});
uvmsPkgs = pkgs.callPackage ../pkgs { };
chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings;
uvmPrefix = "\${HOME}/uvms/${hostName}";
vmmSock = "${uvmPrefix}/vmm.sock";
elbPrefix = "${lib.getBin pkgs.execline}/bin";
s6Prefix = "${lib.getBin pkgs.s6}/bin";
writeElb = name: text: writeElb' name "-W" text;
writeElb' =
name: elArgs: text:
pkgs.writeTextFile {
inherit name;
destination = "/bin/${name}";
executable = true;
text = ''
#!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs}
importas OLDPATH PATH
export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}"
${text}
'';
};
in
{
options = {
uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)";
uvms.cloud-hypervisor.runner = mkOption {
type = types.package;
uvms.cloud-hypervisor.runner = lib.mkOption {
type = lib.types.package;
description = "A naive script for running this system in cloud-hypervisor";
};
uvms.cloud-hypervisor.debugger = mkOption {
type = types.lazyAttrsOf types.anything;
description = "Same but you can debug the kernel";
uvms.cloud-hypervisor.extraArgv = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
};
uvms.cloud-hypervisor.settingsFile = mkOption {
type = types.package;
default = chSettingsFile;
defaultText = "...";
readOnly = true;
};
uvms.cloud-hypervisor.settings = mkOption {
default = { };
type = types.submodule {
freeformType = (pkgs.formats.json { }).type;
options = {
payload = {
cmdline = mkOption { type = types.str; };
kernel = mkOption { type = types.str; };
initramfs = mkOption {
type = types.str;
default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}";
};
};
vsock = {
cid = mkOption {
type = types.int;
default = 4;
};
socket = mkOption {
type = types.str;
default = "vsock.sock";
};
};
"api-socket" = mkOption {
type = types.str;
default = "vmm.sock";
};
"serial".mode = mkOption {
type = types.str;
default = "File";
};
"serial".file = mkOption {
type = types.nullOr types.str;
default = "serial";
};
"console".mode = mkOption {
type = types.str;
default = "Pty";
};
"console".file = mkOption {
type = types.nullOr types.str;
default = null;
};
# "watchdog" = true;
# "seccomp" = true;
};
};
uvms.cloud-hypervisor.argv = lib.mkOption {
type = lib.types.listOf lib.types.str;
};
uvms.cloud-hypervisor.extraCmdline = lib.mkOption {
type = lib.types.listOf lib.types.str;
@ -136,539 +45,34 @@ in
};
config = lib.mkMerge [
{
uvms.cloud-hypervisor.settings = {
payload = {
cmdline = lib.concatStringsSep " " cfg.cmdline;
kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}";
};
disks = map (img: {
path = img;
readonly = true;
id = toString img.label;
}) layers;
memory = {
size = 1536 * 1048576;
shared = true;
mergeable = true;
# hotplugged_size = 512 * 1048576;
# hotplugd_size = 1536 * 1048576;
# hotplug_method = "virtio-mem"
};
cpus = {
boot_vcpus = 4;
max_vcpus = 4;
};
};
uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest (
{ config, ... }:
{
name = "test-run-${hostName}";
passthru = rec {
inherit (config.nodes.machine.system.build) gdbScript;
inherit (config.nodes.machine.boot.kernelPackages) kernel;
kernelSrc = pkgs.srcOnly kernel;
};
nodes.machine =
{ config, ... }:
let
kernel = config.boot.kernelPackages.kernel;
kernelSrc = pkgs.srcOnly kernel;
gdbScript = writeElb "attach-gdb" ''
if { rm -rf /tmp/gdb }
if { mkdir -p /tmp/gdb/kos }
cd /tmp/gdb
if {
elglob -0 files ${kernelSrc}/*
forx -E f { $files }
ln -s $f ./
}
if { mkdir -p build }
cd build
if {
forx -E pattern {
${kernel.modules}/lib/modules/*/kernel/drivers/net/tun*
${kernel.modules}/lib/modules/*/kernel/drivers/net/tap*
}
elglob -0 files $pattern
forx -E f { $files }
if { cp $f . }
backtick -E COMPRESSED { basename $f }
xz -d $COMPRESSED
}
elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb
if {
if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts }
mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py
}
${getExe pkgs.gdb}
-ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")"
-ex "target remote :1234"
-ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py"
-ex "lx-symbols"
${kernel.dev}/vmlinux
'';
in
{
boot.kernelPackages = pkgs.linuxPackagesFor (
(pkgs.linux.override (oldArgs: {
# extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [
# "scripts_gdb"
# ];
kernelPatches = oldArgs.kernelPatches or [ ] ++ [
{
name = "debug";
patch = null;
structuredExtraConfig = {
GDB_SCRIPTS = lib.kernel.yes;
DEBUG_INFO = lib.kernel.yes;
DEBUG_INFO_REDUCED = lib.kernel.no;
# FRAME_POINTER = lib.kernel.yes; # "unused option"???
KALLSYMS = lib.kernel.yes;
KGDB = lib.kernel.yes;
};
}
];
})).overrideAttrs
(oldAttrs: {
dontStrip = true;
postInstall = oldAttrs.postInstall or "" + ''
cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist"
'';
})
);
boot.kernelParams = [ "nokaslr" ];
networking.useNetworkd = true;
virtualisation.qemu.options = [ "-s" ];
environment.systemPackages = [
pkgs.gdb
package # CH
cfg.runner
uvmsPkgs.taps
];
system.build.gdbScript = gdbScript;
systemd.services.taps = {
wantedBy = [ "multi-user.target" ];
environment.TAPS_SOCK = "/run/taps/taps.sock";
serviceConfig = {
UMask = "0007";
ExecStart = "${getExe uvmsPkgs.taps} serve";
RuntimeDirectory = "taps";
DynamicUser = true;
AmbientCapabilities = [
"CAP_NET_BIND_SERVICE"
"CAP_NET_ADMIN"
];
NoNewPrivileges = true;
};
};
};
testScript = ''
machine.succeed("${getExe cfg.runner}")
'';
}
uvms.cloud-hypervisor.argv = lib.mkBefore (
[
(lib.getExe pkgs.cloud-hypervisor)
"--cmdline=${lib.concatStringsSep " " cfg.cmdline}"
"--kernel=${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}"
"--initramfs=${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}"
"--vsock=cid=4,socket=vsock.sock"
"--api-socket=vmm.sock"
"--serial=tty"
"--console=null"
"--watchdog"
"--seccomp=true"
]
++ cfg.extraArgv
);
# NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum
uvms.cloud-hypervisor.runner =
let
toolsClosure = pkgs.writeClosure [
(lib.getBin pkgs.execline)
(lib.getBin pkgs.s6)
(lib.getBin package)
(lib.getBin pkgs.virtiofsd)
(lib.getBin pkgs.bubblewrap)
uvmsPkgs.taps
];
superviseVm = getExe superviseVm';
superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } ''
import os
import subprocess
import socket
from argparse import ArgumentParser
from contextlib import contextmanager, closing, ExitStack
parser = ArgumentParser("supervise-vm")
parser.add_argument("--vm")
parser.add_argument("--prefix", default="$HOME/uvms/$VM")
parser.add_argument("--sock", default="$PREFIX/supervisor.sock")
parser.add_argument("--vm-config")
MSG_SIZE = 16
ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501
S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501
CH_DIR = "${lib.getBin package}/bin" # noqa: E501
UTIL_LINUX_DIR = "${lib.getBin pkgs.util-linux}/bin" # noqa: E501
SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501
CH_PATH = CH_DIR + "/cloud-hypervisor"
CHR_PATH = CH_DIR + "/ch-remote"
TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501
VIRTIOFSD_PATH = "${lib.getExe pkgs.virtiofsd}" # noqa: E501
BWRAP_PATH = "${lib.getExe pkgs.bubblewrap}" # noqa: E501
with open("${toolsClosure}", mode="r") as f: # noqa: E501
CLOSURE = [
*(ln.rstrip() for ln in f.readlines()),
"${placeholder "out"}", # noqa: E501
]
PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR, UTIL_LINUX_DIR])
PASSTHRU_ENV = {
**{
k: v
for k, v in os.environ.items()
if k.startswith("RUST")
or k.startswith("WAYLAND")
or k in [
"TAPS_SOCK",
]
},
"HOME": os.environ.get("HOME", os.getcwd()),
"PATH": PASSTHRU_PATH,
}
def preprocess_args(args_mut):
keys = [
k
for k, v
in args_mut._get_kwargs()
if isinstance(v, str)]
for k in keys:
v = getattr(args_mut, k)
if "$HOME" in v:
setattr(
args_mut,
k,
v.replace("$HOME", PASSTHRU_ENV["HOME"]))
for k in keys:
v = getattr(args_mut, k)
if "$VM" in v:
setattr(args_mut, k, v.replace("$VM", args.vm))
for k in keys:
v = getattr(args_mut, k)
if "$PREFIX" in v:
setattr(args_mut, k, v.replace("$PREFIX", args.prefix))
return args_mut
class Processes:
def __init__(self, prefix, vm, check=True, **defaults):
self.prefix = prefix
self.vm = vm
self.check = check
self.defaults = defaults
def make_env(self):
return {
**PASSTHRU_ENV,
"PATH": PASSTHRU_PATH,
"PREFIX": self.prefix,
"VM": self.vm,
}
def exec(self, *args, **kwargs):
kwargs["cwd"] = kwargs.get("cwd", self.prefix)
kwargs["check"] = kwargs.get("check", self.check)
kwargs["env"] = kwargs.get("env", self.make_env())
return subprocess.run(
[*args],
**self.defaults,
**kwargs)
def execline(self, *args, **kwargs):
return exec(
"execlineb", "-c", "\n".join(args),
**self.defaults,
executable=ELB_DIR + "/execlineb",
**{
"env": self.make_env(),
"check": self.check,
"cwd": self.prefix,
**kwargs,
},
)
def popen(self, *args, **kwargs):
kwargs["pass_fds"] = kwargs.get("pass_fds", ())
kwargs["env"] = kwargs.get("env", self.make_env())
kwargs["cwd"] = kwargs.get("cwd", self.prefix)
return subprocess.Popen(
args,
**kwargs,
)
@contextmanager
def bwrap(
self,
*bwrap_args,
die_with_parent=True,
# Based on the args from
# `host/rootfs/image/usr/bin/run-vmm`
unshare_all=True,
unshare_user=True,
unshare_ipc=None,
unshare_pid=None,
unshare_net=None,
unshare_uts=None,
unshare_cgroup_try=True,
bind=(),
dev_bind=("/dev/kvm", "/dev/vfio"),
dev="/dev",
proc="/proc",
ro_bind=(
"/etc",
"/sys",
"/proc/sys",
"/dev/null",
"/proc/kallsyms",
*CLOSURE),
ro_bind_extra=(),
remount_ro=("/proc/fs", "/proc/irq"),
tmpfs=("/dev/shm", "/tmp", "/var/tmp", "/proc/fs", "/proc/irq"),
tmpfs_extra=(),
pass_fds=(2,),
**popen_kwargs):
bwrap_args_sock, remote = socket.socketpair()
remote.set_inheritable(True)
bwrap_args_f = bwrap_args_sock.makefile("w")
with closing(bwrap_args_sock), closing(bwrap_args_f):
def print_arg(*args):
print(*args, file=bwrap_args_f, sep="\0", end="\0")
if unshare_all:
print_arg("--unshare-all")
if unshare_user:
print_arg("--unshare-user")
if unshare_ipc:
print_arg("--unshare-ipc")
if unshare_pid:
print_arg("--unshare-pid")
if unshare_net:
print_arg("--unshare-net")
if unshare_uts:
print_arg("--unshare-uts")
if unshare_cgroup_try:
print_arg("--unshare-cgroup-try")
if die_with_parent:
print_arg("--die-with-parent")
for p in bind:
p1, p2 = (p, p) if isinstance(p, str) else p
print_arg("--bind", p1, p2)
for p in (*ro_bind, *ro_bind_extra):
p1, p2 = (p, p) if isinstance(p, str) else p
print_arg("--ro-bind", p1, p2)
for p in dev_bind:
p1, p2 = (p, p) if isinstance(p, str) else p
print_arg("--dev-bind", p1, p2)
for p in (*tmpfs, *tmpfs_extra):
print_arg("--tmpfs", p)
# Hunch: order might matter...
for p in remount_ro:
print_arg("--remount-ro", p)
bwrap_args_f.flush()
with closing(remote):
proc = self.popen(
"bwrap", "--args", str(remote.fileno()), *bwrap_args,
**popen_kwargs,
executable=BWRAP_PATH,
pass_fds=(*pass_fds, remote.fileno()),
)
with proc as p:
try:
yield p
finally:
try:
p.poll()
except: # noqa: E722
pass
if p.returncode is None:
p.terminate()
p.wait()
@contextmanager
def run_ch(self):
args = [
SOCKETBINDER_PATH,
"-B",
self.prefix + "/vmm.sock",
CH_PATH,
"--api-socket",
"fd=0",
]
p = self.popen(
*args,
shell=False,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
pass_fds=(2,))
try:
p.wait(0.125)
needs_cleanup = False
except subprocess.TimeoutExpired:
needs_cleanup = True
if not os.path.exists(self.prefix + "/vmm.sock"):
raise RuntimeError(f"{self.prefix}/vmm.sock should exist by now")
if p.returncode is not None:
raise RuntimeError("CH exited early")
try:
yield p
finally:
try:
p.poll()
except: # noqa: E722
pass
if p.returncode is None:
p.terminate() # CH handles SIG{INT,TERM}?
p.wait()
unlink_paths = [
self.prefix + "/vmm.sock",
self.prefix + "/vmm.sock.lock",
self.prefix + "/vsock.sock",
] if needs_cleanup else []
for p in unlink_paths:
if os.path.exists(p):
os.remove(p)
@contextmanager
def add_virtiofsd(
self,
root_dir,
tag,
ro=False,
subdirs=None,
extra_flags=("--posix-acl",)):
assert os.path.exists(root_dir)
sock_path = self.prefix + f"/virtiofsd-{tag}.sock"
# s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
# NOTE: Nope. Virtiofsd actually expects a blocking socket
# s.setblocking(True)
def rm_sock():
if os.path.exists(sock_path):
os.remove(sock_path)
with ExitStack() as cleanup: # noqa: F841
# s.bind(sock_path.encode("utf8"))
# cleanup.enter_context(closing(s))
cleanup.enter_context(defer(rm_sock))
args = [
# If using bwrap():
# "--argv0", "virtiofsd",
# "--uid", "1000",
# "--gid", "1000",
# "--",
"unshare", "-rUm",
"unshare", "--map-user", "1000", "--map-group", "1000",
VIRTIOFSD_PATH,
"--shared-dir",
root_dir,
"--tag",
tag,
# "--fd",
# str(s.fileno()),
"--socket-path",
sock_path,
# If relying on bwrap():
# "--sandbox",
# "none",
]
if ro:
args.append("--readonly")
kwargs = {
# If bwrap():
# "bind": [],
# ("ro_bind_extra" if ro else "bind"):
# [*subdirs]
# if subdirs is not None
# else [root_dir],
# "pass_fds": (2, s.fileno()),
}
proc_ctx = self.popen(*args, **kwargs)
with proc_ctx as p:
try:
try:
p.wait(0.125)
except subprocess.TimeoutExpired:
pass
if p.returncode is not None:
raise RuntimeError("virtiofsd exited too early")
yield p, sock_path
finally:
if p.returncode is None:
p.kill()
p.wait()
if os.path.exists(sock_path):
os.remove(sock_path)
@contextmanager
def defer(f):
try:
yield
finally:
f()
if __name__ == "__main__":
args, args_next = parser.parse_known_args()
preprocess_args(args)
os.makedirs(args.prefix, exist_ok=True)
ps = Processes(
prefix=args.prefix,
vm=args.vm,
)
ch_remote = [
"ch-remote",
"--api-socket",
args.prefix + "/vmm.sock",
]
with ExitStack() as cleanup:
ch = cleanup.enter_context(ps.run_ch())
ps.exec(*ch_remote, "create", args.vm_config)
ps.exec(
TAPS_PATH, "pass",
*ch_remote, "add-net",
"id=wan,fd=3,mac=00:00:00:00:00:01")
send_dir = PASSTHRU_ENV["HOME"] + f"/send/{args.vm}"
os.makedirs(send_dir, exist_ok=True)
vfsd, vfsd_path = cleanup.enter_context(
ps.add_virtiofsd(
send_dir,
tag="send",
))
ps.exec(*ch_remote, "add-fs", f"tag=send,socket={vfsd_path},id=send")
ps.exec(*ch_remote, "boot")
ps.exec(*ch_remote, "info")
try:
ch.wait()
except KeyboardInterrupt:
pass
'';
in
writeElb "run-${hostName}" ''
${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName}
'';
uvms.cloud-hypervisor.runner = pkgs.writeShellScriptBin "run-${config.networking.hostName}" ''
set -euo pipefail
GUESTNAME=${config.networking.hostName}
args=(
${lib.concatMapStringsSep "\n" lib.escapeShellArg cfg.argv}
)
mkdir -p "$HOME/uvms/$GUESTNAME"
cd "$HOME/uvms/$GUESTNAME"
cleanup() {
rm "$HOME/uvms/$GUESTNAME"/{vmm,vsock}.sock
}
exec -a "uuvm/$GUESTNAME" "''${args[@]}"
'';
}
(lib.mkIf cfg.enable {
boot.initrd.availableKernelModules = [
@ -699,6 +103,12 @@ in
}
) layers
);
uvms.cloud-hypervisor.argv = [
"--memory=size=1536M,hotplug_size=1536M,hotplugged_size=512M,hotplug_method=virtio-mem,mergeable=on,shared=on"
"--cpus=boot=4"
"--disk"
]
++ map (img: "path=${img},readonly=true,id=${toString img.label}") layers;
})
];
}

View file

@ -41,7 +41,6 @@ in
volumes = [
{
image = "swapfile.img";
serial = "swapfiles";
mountPoint = "/var/swapfiles";
size = 1024;
}

View file

@ -29,6 +29,7 @@ in
};
config = mergeIf cfg.enable [
{
services.getty.autologinUser = "user";
security.sudo.wheelNeedsPassword = false;
users.mutableUsers = false;
users.users.user = {

View file

@ -1,13 +0,0 @@
with import <nixpkgs> { };
mkShell.override { stdenv = stdenvNoCC; } {
packages = map lib.getBin [
cloud-hypervisor
virtiofsd
crosvm # virtio-gpu
npins
] ++ [
man-pages
linux-manual
];
}