Execlineb is insane. Skarnet is insane. POSIX is cancer, but elb seems to make it worse. Or it s a skill issue. I see no way to do any kind of error handling with elb, and that is even despite me only needing one kind of error handling: the cleaning up...
508 lines
17 KiB
Nix
508 lines
17 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
|
|
# It is not the intent to stick to the microvm.nix-like static interface,
|
|
# but we shall begin by reproducing at least some of their work.
|
|
|
|
let
|
|
cfg = config.uvms.cloud-hypervisor;
|
|
|
|
inherit (config.networking) hostName;
|
|
inherit (config.debug.closure.erofs) layers;
|
|
inherit (lib)
|
|
mkOption
|
|
types
|
|
concatMapStringsSep
|
|
getExe
|
|
getExe'
|
|
getBin
|
|
;
|
|
|
|
package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: {
|
|
patches = oldAttrs.patches or [ ] ++ [
|
|
# ../patches/ch.patch
|
|
];
|
|
buildType = "debug";
|
|
dontStrip = true;
|
|
});
|
|
uvmsPkgs = pkgs.callPackage ../pkgs { };
|
|
|
|
chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings;
|
|
|
|
uvmPrefix = "\${HOME}/uvms/${hostName}";
|
|
vmmSock = "${uvmPrefix}/vmm.sock";
|
|
elbPrefix = "${lib.getBin pkgs.execline}/bin";
|
|
s6Prefix = "${lib.getBin pkgs.s6}/bin";
|
|
writeElb = name: text: writeElb' name "-W" text;
|
|
writeElb' =
|
|
name: elArgs: text:
|
|
pkgs.writeTextFile {
|
|
inherit name;
|
|
destination = "/bin/${name}";
|
|
executable = true;
|
|
text = ''
|
|
#!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs}
|
|
importas OLDPATH PATH
|
|
export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}"
|
|
${text}
|
|
'';
|
|
};
|
|
in
|
|
{
|
|
options = {
|
|
uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)";
|
|
uvms.cloud-hypervisor.runner = mkOption {
|
|
type = types.package;
|
|
description = "A naive script for running this system in cloud-hypervisor";
|
|
};
|
|
uvms.cloud-hypervisor.debugger = mkOption {
|
|
type = types.lazyAttrsOf types.anything;
|
|
description = "Same but you can debug the kernel";
|
|
};
|
|
uvms.cloud-hypervisor.settingsFile = mkOption {
|
|
type = types.package;
|
|
default = chSettingsFile;
|
|
defaultText = "...";
|
|
readOnly = true;
|
|
};
|
|
uvms.cloud-hypervisor.settings = mkOption {
|
|
default = { };
|
|
type = types.submodule {
|
|
freeformType = (pkgs.formats.json { }).type;
|
|
options = {
|
|
payload = {
|
|
cmdline = mkOption { type = types.str; };
|
|
kernel = mkOption { type = types.str; };
|
|
initramfs = mkOption {
|
|
type = types.str;
|
|
default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}";
|
|
};
|
|
};
|
|
vsock = {
|
|
cid = mkOption {
|
|
type = types.int;
|
|
default = 4;
|
|
};
|
|
socket = mkOption {
|
|
type = types.str;
|
|
default = "vsock.sock";
|
|
};
|
|
};
|
|
"api-socket" = mkOption {
|
|
type = types.str;
|
|
default = "vmm.sock";
|
|
};
|
|
"serial".mode = mkOption {
|
|
type = types.str;
|
|
default = "File";
|
|
};
|
|
"serial".file = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = "serial";
|
|
};
|
|
"console".mode = mkOption {
|
|
type = types.str;
|
|
default = "Pty";
|
|
};
|
|
"console".file = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = null;
|
|
};
|
|
# "watchdog" = true;
|
|
# "seccomp" = true;
|
|
};
|
|
};
|
|
};
|
|
uvms.cloud-hypervisor.extraCmdline = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [ ];
|
|
};
|
|
uvms.cloud-hypervisor.cmdline = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [
|
|
"earlyprintk=ttyS0"
|
|
"console=ttyS0"
|
|
"reboot=t"
|
|
"panic=-1"
|
|
"init=${config.system.build.toplevel}/init"
|
|
]
|
|
++ config.boot.kernelParams
|
|
++ config.uvms.cloud-hypervisor.extraCmdline;
|
|
};
|
|
};
|
|
config = lib.mkMerge [
|
|
{
|
|
uvms.cloud-hypervisor.settings = {
|
|
payload = {
|
|
cmdline = lib.concatStringsSep " " cfg.cmdline;
|
|
kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}";
|
|
};
|
|
disks = map (img: {
|
|
path = img;
|
|
readonly = true;
|
|
id = toString img.label;
|
|
}) layers;
|
|
memory = {
|
|
size = 1536 * 1048576;
|
|
shared = true;
|
|
mergeable = true;
|
|
# hotplugged_size = 512 * 1048576;
|
|
# hotplugd_size = 1536 * 1048576;
|
|
# hotplug_method = "virtio-mem"
|
|
};
|
|
cpus = {
|
|
boot_vcpus = 4;
|
|
max_vcpus = 4;
|
|
};
|
|
};
|
|
|
|
uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest (
|
|
{ config, ... }:
|
|
{
|
|
name = "test-run-${hostName}";
|
|
passthru = rec {
|
|
inherit (config.nodes.machine.system.build) gdbScript;
|
|
inherit (config.nodes.machine.boot.kernelPackages) kernel;
|
|
kernelSrc = pkgs.srcOnly kernel;
|
|
};
|
|
nodes.machine =
|
|
{ config, ... }:
|
|
let
|
|
kernel = config.boot.kernelPackages.kernel;
|
|
kernelSrc = pkgs.srcOnly kernel;
|
|
gdbScript = writeElb "attach-gdb" ''
|
|
if { rm -rf /tmp/gdb }
|
|
if { mkdir -p /tmp/gdb/kos }
|
|
cd /tmp/gdb
|
|
if {
|
|
elglob -0 files ${kernelSrc}/*
|
|
forx -E f { $files }
|
|
ln -s $f ./
|
|
}
|
|
if { mkdir -p build }
|
|
cd build
|
|
if {
|
|
forx -E pattern {
|
|
${kernel.modules}/lib/modules/*/kernel/drivers/net/tun*
|
|
${kernel.modules}/lib/modules/*/kernel/drivers/net/tap*
|
|
}
|
|
elglob -0 files $pattern
|
|
forx -E f { $files }
|
|
if { cp $f . }
|
|
backtick -E COMPRESSED { basename $f }
|
|
xz -d $COMPRESSED
|
|
}
|
|
elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb
|
|
if {
|
|
if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts }
|
|
mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py
|
|
}
|
|
${getExe pkgs.gdb}
|
|
-ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")"
|
|
-ex "target remote :1234"
|
|
-ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py"
|
|
-ex "lx-symbols"
|
|
${kernel.dev}/vmlinux
|
|
'';
|
|
in
|
|
{
|
|
boot.kernelPackages = pkgs.linuxPackagesFor (
|
|
(pkgs.linux.override (oldArgs: {
|
|
# extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [
|
|
# "scripts_gdb"
|
|
# ];
|
|
kernelPatches = oldArgs.kernelPatches or [ ] ++ [
|
|
{
|
|
name = "debug";
|
|
patch = null;
|
|
structuredExtraConfig = {
|
|
GDB_SCRIPTS = lib.kernel.yes;
|
|
DEBUG_INFO = lib.kernel.yes;
|
|
DEBUG_INFO_REDUCED = lib.kernel.no;
|
|
# FRAME_POINTER = lib.kernel.yes; # "unused option"???
|
|
KALLSYMS = lib.kernel.yes;
|
|
KGDB = lib.kernel.yes;
|
|
};
|
|
}
|
|
];
|
|
})).overrideAttrs
|
|
(oldAttrs: {
|
|
dontStrip = true;
|
|
postInstall = oldAttrs.postInstall or "" + ''
|
|
cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist"
|
|
'';
|
|
})
|
|
);
|
|
boot.kernelParams = [ "nokaslr" ];
|
|
networking.useNetworkd = true;
|
|
virtualisation.qemu.options = [ "-s" ];
|
|
environment.systemPackages = [
|
|
pkgs.gdb
|
|
package # CH
|
|
cfg.runner
|
|
uvmsPkgs.taps
|
|
];
|
|
system.build.gdbScript = gdbScript;
|
|
systemd.services.taps = {
|
|
wantedBy = [ "multi-user.target" ];
|
|
environment.TAPS_SOCK = "/run/taps/taps.sock";
|
|
serviceConfig = {
|
|
UMask = "0007";
|
|
ExecStart = "${getExe uvmsPkgs.taps} serve";
|
|
RuntimeDirectory = "taps";
|
|
DynamicUser = true;
|
|
AmbientCapabilities = [
|
|
"CAP_NET_BIND_SERVICE"
|
|
"CAP_NET_ADMIN"
|
|
];
|
|
NoNewPrivileges = true;
|
|
};
|
|
};
|
|
};
|
|
testScript = ''
|
|
machine.succeed("${getExe cfg.runner}")
|
|
'';
|
|
}
|
|
);
|
|
|
|
# NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum
|
|
uvms.cloud-hypervisor.runner =
|
|
let
|
|
addProcess = getExe addProcess';
|
|
addProcess' = pkgs.writers.writePython3Bin "add-process" { } ''
|
|
import os
|
|
import select
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
from argparse import ArgumentParser
|
|
from contextlib import contextmanager, ExitStack
|
|
from threading import Thread, Semaphore
|
|
|
|
|
|
parser = ArgumentParser()
|
|
parser.add_argument("events_path")
|
|
parser.add_argument("--then", action="append")
|
|
|
|
MSG_SIZE = 16
|
|
SHMEM = {}
|
|
|
|
|
|
def send(sock, msg):
|
|
assert len(msg) <= MSG_SIZE, len(msg)
|
|
return sock.send(msg.ljust(MSG_SIZE))
|
|
|
|
|
|
def recv(sock):
|
|
msg = sock.recv(MSG_SIZE)
|
|
# assert len(msg) <= MSG_SIZE, len(msg)
|
|
assert len(msg) <= MSG_SIZE, len(msg)
|
|
return (msg.split() + [b""])[0]
|
|
|
|
|
|
def serve_impl(events_path, listener):
|
|
SHMEM["server"] = True
|
|
|
|
cons = []
|
|
state = "up"
|
|
while state == "up" or cons != []:
|
|
if state == "up":
|
|
rs, ws, es = select.select([listener, *cons], [], [])
|
|
else:
|
|
rs, ws, es = select.select(cons, cons, [])
|
|
events = []
|
|
for r in rs:
|
|
if r is listener:
|
|
r, _ = r.accept()
|
|
cons.append(r)
|
|
else:
|
|
events.append(recv(r))
|
|
if any(e == b"killall" for e in events):
|
|
state = "down"
|
|
if state == "down":
|
|
for w in ws:
|
|
with s_lock:
|
|
send(w, b"die")
|
|
w.close()
|
|
cons.remove(w)
|
|
for w in es:
|
|
w.close()
|
|
cons.remove(w)
|
|
|
|
|
|
def serve(events_path):
|
|
base_dir = os.path.dirname(events_path)
|
|
if base_dir:
|
|
os.makedirs(base_dir, exist_ok=True)
|
|
listener = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0)
|
|
listener.setblocking(False)
|
|
|
|
try:
|
|
listener.bind(events_path)
|
|
listener.listen()
|
|
return serve_impl(events_path, listener)
|
|
except OSError as e:
|
|
EADDRINUSE = 98
|
|
if e.errno != EADDRINUSE:
|
|
raise
|
|
finally:
|
|
listener.close()
|
|
os.remove(events_path)
|
|
|
|
|
|
def register(events_path):
|
|
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0)
|
|
sock.connect(events_path)
|
|
return sock
|
|
|
|
|
|
@contextmanager
|
|
def defer(f):
|
|
try:
|
|
yield
|
|
finally:
|
|
f()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args, args_next = parser.parse_known_args()
|
|
|
|
with ExitStack() as cleanup:
|
|
if args_next:
|
|
p = subprocess.Popen(
|
|
args_next,
|
|
shell=False)
|
|
then_cmds = reversed(getattr(args, "then") or [])
|
|
if not args_next:
|
|
then_cmds = []
|
|
try:
|
|
p.wait(0.5)
|
|
then_cmds = []
|
|
except subprocess.TimeoutExpired:
|
|
pass
|
|
for f in then_cmds:
|
|
def run_f():
|
|
subprocess.run(f)
|
|
cleanup.enter_context(defer(run_f))
|
|
|
|
maybe_server = Thread(
|
|
target=serve,
|
|
args=(args.events_path,),
|
|
daemon=True)
|
|
maybe_server.start()
|
|
maybe_server.join(0.5)
|
|
|
|
assert (
|
|
("server" in SHMEM) == bool(maybe_server.is_alive)
|
|
), (SHMEM, maybe_server)
|
|
|
|
if args_next:
|
|
s = register(args.events_path)
|
|
s_lock = Semaphore()
|
|
|
|
if args_next:
|
|
def watch_p(p, s):
|
|
p.wait()
|
|
with s_lock:
|
|
try:
|
|
send(s, b"killall")
|
|
except BrokenPipeError:
|
|
pass
|
|
|
|
def watch_s(p, s):
|
|
while True:
|
|
if recv(s) == b"die":
|
|
p.terminate()
|
|
break
|
|
|
|
s_watcher = Thread(
|
|
target=watch_s,
|
|
args=(p, s),
|
|
daemon=True)
|
|
s_watcher.start()
|
|
watch_p(p, s)
|
|
s_watcher.join()
|
|
s.close()
|
|
|
|
if SHMEM.get("server", False):
|
|
maybe_server.join()
|
|
|
|
exit_code = 0
|
|
if args_next:
|
|
exit_code |= p.returncode
|
|
sys.exit(exit_code)
|
|
'';
|
|
ch = getExe package;
|
|
chr = getExe' package "ch-remote";
|
|
in
|
|
writeElb "run-${hostName}" ''
|
|
importas -i HOME HOME
|
|
importas -SsD "${chr} --api-socket=${vmmSock}" CHR
|
|
importas -SsD "${uvmPrefix}" PREFIX
|
|
define EVENTS ''${PREFIX}/events.sock
|
|
define -s ADD_PROC "${addProcess} ''${EVENTS}"
|
|
|
|
cd $PREFIX
|
|
background {
|
|
$ADD_PROC --then ${getExe (
|
|
writeElb "rm-vmmsock" ''
|
|
importas -i HOME HOME
|
|
rm -f ${vmmSock}
|
|
rm -f ${uvmPrefix}/vsock.sock
|
|
''
|
|
)} ${getExe (
|
|
writeElb "ch" ''
|
|
importas -Si 1
|
|
importas -Si 2
|
|
s6-ipcserver-socketbinder -B $1
|
|
exec -a "uuvm/''${2} cloud-hypervisor" ${ch} --api-socket fd=0
|
|
''
|
|
)} ${vmmSock} ${hostName}
|
|
}
|
|
foreground { sleep 0.1 }
|
|
ifelse -n { test -S ${vmmSock} } { echo "Apparently ${vmmSock} does not exist" }
|
|
foreground { echo "Loading the configuration" }
|
|
if { $CHR create ${chSettingsFile} }
|
|
foreground { echo "Adding TAP" }
|
|
if { ${lib.getExe uvmsPkgs.taps} pass $CHR add-net "id=wan,fd=3,mac=00:00:00:00:00:01" }
|
|
foreground { echo "Booting" }
|
|
if { $CHR boot }
|
|
if { $CHR info }
|
|
'';
|
|
}
|
|
(lib.mkIf cfg.enable {
|
|
boot.initrd.availableKernelModules = [
|
|
"erofs"
|
|
"overlay"
|
|
"virtio_mmio"
|
|
"virtio_pci"
|
|
"virtio_blk"
|
|
# "9pnet_virtio"
|
|
# "9p"
|
|
"virtiofs"
|
|
];
|
|
boot.initrd.systemd.enable = lib.mkDefault true;
|
|
fileSystems = {
|
|
"/nix/store" = {
|
|
fsType = "overlay";
|
|
overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers;
|
|
neededForBoot = true;
|
|
};
|
|
}
|
|
// lib.listToAttrs (
|
|
map (
|
|
img:
|
|
lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" {
|
|
device = "/dev/disk/by-label/${img.label}";
|
|
neededForBoot = true;
|
|
options = [ "x-systemd.device-timeout=5" ];
|
|
}
|
|
) layers
|
|
);
|
|
})
|
|
];
|
|
}
|