488 lines
16 KiB
Nix
488 lines
16 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
|
|
# It is not the intent to stick to the microvm.nix-like static interface,
|
|
# but we shall begin by reproducing at least some of their work.
|
|
|
|
let
|
|
cfg = config.uvms.cloud-hypervisor;
|
|
|
|
inherit (config.networking) hostName;
|
|
inherit (config.debug.closure.erofs) layers;
|
|
inherit (lib)
|
|
mkOption
|
|
types
|
|
concatMapStringsSep
|
|
getExe
|
|
getExe'
|
|
getBin
|
|
;
|
|
|
|
package = pkgs.cloud-hypervisor.overrideAttrs (oldAttrs: {
|
|
patches = oldAttrs.patches or [ ] ++ [
|
|
# ../patches/ch.patch
|
|
];
|
|
buildType = "debug";
|
|
dontStrip = true;
|
|
});
|
|
uvmsPkgs = pkgs.callPackage ../pkgs { };
|
|
|
|
chSettingsFile = (pkgs.formats.json { }).generate "vm.json" cfg.settings;
|
|
|
|
uvmPrefix = "\${HOME}/uvms/${hostName}";
|
|
vmmSock = "${uvmPrefix}/vmm.sock";
|
|
elbPrefix = "${lib.getBin pkgs.execline}/bin";
|
|
s6Prefix = "${lib.getBin pkgs.s6}/bin";
|
|
writeElb = name: text: writeElb' name "-W" text;
|
|
writeElb' =
|
|
name: elArgs: text:
|
|
pkgs.writeTextFile {
|
|
inherit name;
|
|
destination = "/bin/${name}";
|
|
executable = true;
|
|
text = ''
|
|
#!${getExe' pkgs.execline "execlineb"}${lib.optionalString (elArgs != null) " "}${elArgs}
|
|
importas OLDPATH PATH
|
|
export PATH "${elbPrefix}:${s6Prefix}:''${OLDPATH}"
|
|
${text}
|
|
'';
|
|
};
|
|
in
|
|
{
|
|
options = {
|
|
uvms.cloud-hypervisor.enable = lib.mkEnableOption "Configure guest (e.g. fileSystems)";
|
|
uvms.cloud-hypervisor.runner = mkOption {
|
|
type = types.package;
|
|
description = "A naive script for running this system in cloud-hypervisor";
|
|
};
|
|
uvms.cloud-hypervisor.debugger = mkOption {
|
|
type = types.lazyAttrsOf types.anything;
|
|
description = "Same but you can debug the kernel";
|
|
};
|
|
uvms.cloud-hypervisor.settingsFile = mkOption {
|
|
type = types.package;
|
|
default = chSettingsFile;
|
|
defaultText = "...";
|
|
readOnly = true;
|
|
};
|
|
uvms.cloud-hypervisor.settings = mkOption {
|
|
default = { };
|
|
type = types.submodule {
|
|
freeformType = (pkgs.formats.json { }).type;
|
|
options = {
|
|
payload = {
|
|
cmdline = mkOption { type = types.str; };
|
|
kernel = mkOption { type = types.str; };
|
|
initramfs = mkOption {
|
|
type = types.str;
|
|
default = "${config.system.build.initialRamdisk}/${config.system.boot.loader.initrdFile}";
|
|
};
|
|
};
|
|
vsock = {
|
|
cid = mkOption {
|
|
type = types.int;
|
|
default = 4;
|
|
};
|
|
socket = mkOption {
|
|
type = types.str;
|
|
default = "vsock.sock";
|
|
};
|
|
};
|
|
"api-socket" = mkOption {
|
|
type = types.str;
|
|
default = "vmm.sock";
|
|
};
|
|
"serial".mode = mkOption {
|
|
type = types.str;
|
|
default = "File";
|
|
};
|
|
"serial".file = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = "serial";
|
|
};
|
|
"console".mode = mkOption {
|
|
type = types.str;
|
|
default = "Pty";
|
|
};
|
|
"console".file = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = null;
|
|
};
|
|
# "watchdog" = true;
|
|
# "seccomp" = true;
|
|
};
|
|
};
|
|
};
|
|
uvms.cloud-hypervisor.extraCmdline = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [ ];
|
|
};
|
|
uvms.cloud-hypervisor.cmdline = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [
|
|
"earlyprintk=ttyS0"
|
|
"console=ttyS0"
|
|
"reboot=t"
|
|
"panic=-1"
|
|
"init=${config.system.build.toplevel}/init"
|
|
]
|
|
++ config.boot.kernelParams
|
|
++ config.uvms.cloud-hypervisor.extraCmdline;
|
|
};
|
|
};
|
|
config = lib.mkMerge [
|
|
{
|
|
uvms.cloud-hypervisor.settings = {
|
|
payload = {
|
|
cmdline = lib.concatStringsSep " " cfg.cmdline;
|
|
kernel = "${config.boot.kernelPackages.kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target}";
|
|
};
|
|
disks = map (img: {
|
|
path = img;
|
|
readonly = true;
|
|
id = toString img.label;
|
|
}) layers;
|
|
memory = {
|
|
size = 1536 * 1048576;
|
|
shared = true;
|
|
mergeable = true;
|
|
# hotplugged_size = 512 * 1048576;
|
|
# hotplugd_size = 1536 * 1048576;
|
|
# hotplug_method = "virtio-mem"
|
|
};
|
|
cpus = {
|
|
boot_vcpus = 4;
|
|
max_vcpus = 4;
|
|
};
|
|
};
|
|
|
|
uvms.cloud-hypervisor.debugger = pkgs.testers.runNixOSTest (
|
|
{ config, ... }:
|
|
{
|
|
name = "test-run-${hostName}";
|
|
passthru = rec {
|
|
inherit (config.nodes.machine.system.build) gdbScript;
|
|
inherit (config.nodes.machine.boot.kernelPackages) kernel;
|
|
kernelSrc = pkgs.srcOnly kernel;
|
|
};
|
|
nodes.machine =
|
|
{ config, ... }:
|
|
let
|
|
kernel = config.boot.kernelPackages.kernel;
|
|
kernelSrc = pkgs.srcOnly kernel;
|
|
gdbScript = writeElb "attach-gdb" ''
|
|
if { rm -rf /tmp/gdb }
|
|
if { mkdir -p /tmp/gdb/kos }
|
|
cd /tmp/gdb
|
|
if {
|
|
elglob -0 files ${kernelSrc}/*
|
|
forx -E f { $files }
|
|
ln -s $f ./
|
|
}
|
|
if { mkdir -p build }
|
|
cd build
|
|
if {
|
|
forx -E pattern {
|
|
${kernel.modules}/lib/modules/*/kernel/drivers/net/tun*
|
|
${kernel.modules}/lib/modules/*/kernel/drivers/net/tap*
|
|
}
|
|
elglob -0 files $pattern
|
|
forx -E f { $files }
|
|
if { cp $f . }
|
|
backtick -E COMPRESSED { basename $f }
|
|
xz -d $COMPRESSED
|
|
}
|
|
elglob -0 GDB_SCRIPT_DIR ${lib.getDev kernel}/lib/modules/*/build/scripts/gdb
|
|
if {
|
|
if { cp -r --no-preserve=all $GDB_SCRIPT_DIR gdb_scripts }
|
|
mv gdb_scripts/linux/constants.py.in gdb_scripts/linux/constants.py
|
|
}
|
|
${getExe pkgs.gdb}
|
|
-ex "python import sys; sys.path.insert(0, \"''${GDB_SCRIPT_DIR}\")"
|
|
-ex "target remote :1234"
|
|
-ex "source ''${GDB_SCRIPT_DIR}/vmlinux-gdb.py"
|
|
-ex "lx-symbols"
|
|
${kernel.dev}/vmlinux
|
|
'';
|
|
in
|
|
{
|
|
boot.kernelPackages = pkgs.linuxPackagesFor (
|
|
(pkgs.linux.override (oldArgs: {
|
|
# extraMakeFlags = oldArgs.extraMakeFlags or [ ] ++ [
|
|
# "scripts_gdb"
|
|
# ];
|
|
kernelPatches = oldArgs.kernelPatches or [ ] ++ [
|
|
{
|
|
name = "debug";
|
|
patch = null;
|
|
structuredExtraConfig = {
|
|
GDB_SCRIPTS = lib.kernel.yes;
|
|
DEBUG_INFO = lib.kernel.yes;
|
|
DEBUG_INFO_REDUCED = lib.kernel.no;
|
|
# FRAME_POINTER = lib.kernel.yes; # "unused option"???
|
|
KALLSYMS = lib.kernel.yes;
|
|
KGDB = lib.kernel.yes;
|
|
};
|
|
}
|
|
];
|
|
})).overrideAttrs
|
|
(oldAttrs: {
|
|
dontStrip = true;
|
|
postInstall = oldAttrs.postInstall or "" + ''
|
|
cp "$buildRoot/scripts/gdb/linux/constants.py" $dev/lib/modules/*/build/scripts/gdb/linux/ || echo "$buildRoot/scripts/gdb/linux/constants.py doesn't exist"
|
|
'';
|
|
})
|
|
);
|
|
boot.kernelParams = [ "nokaslr" ];
|
|
networking.useNetworkd = true;
|
|
virtualisation.qemu.options = [ "-s" ];
|
|
environment.systemPackages = [
|
|
pkgs.gdb
|
|
package # CH
|
|
cfg.runner
|
|
uvmsPkgs.taps
|
|
];
|
|
system.build.gdbScript = gdbScript;
|
|
systemd.services.taps = {
|
|
wantedBy = [ "multi-user.target" ];
|
|
environment.TAPS_SOCK = "/run/taps/taps.sock";
|
|
serviceConfig = {
|
|
UMask = "0007";
|
|
ExecStart = "${getExe uvmsPkgs.taps} serve";
|
|
RuntimeDirectory = "taps";
|
|
DynamicUser = true;
|
|
AmbientCapabilities = [
|
|
"CAP_NET_BIND_SERVICE"
|
|
"CAP_NET_ADMIN"
|
|
];
|
|
NoNewPrivileges = true;
|
|
};
|
|
};
|
|
};
|
|
testScript = ''
|
|
machine.succeed("${getExe cfg.runner}")
|
|
'';
|
|
}
|
|
);
|
|
|
|
# NOTE: Used to be an even uglier bash script, but, for now, execline makes for easier comparisons against spectrum
|
|
uvms.cloud-hypervisor.runner =
|
|
let
|
|
superviseVm = getExe superviseVm';
|
|
superviseVm' = pkgs.writers.writePython3Bin "supervise-vm" { } ''
|
|
import os
|
|
import subprocess
|
|
from argparse import ArgumentParser
|
|
from contextlib import contextmanager, ExitStack
|
|
|
|
|
|
parser = ArgumentParser("supervise-vm")
|
|
parser.add_argument("--vm")
|
|
parser.add_argument("--prefix", default="$HOME/uvms/$VM")
|
|
parser.add_argument("--sock", default="$PREFIX/supervisor.sock")
|
|
parser.add_argument("--vm-config")
|
|
|
|
MSG_SIZE = 16
|
|
ELB_DIR = "${lib.getBin pkgs.execline}/bin" # noqa: E501
|
|
S6_DIR = "${lib.getBin pkgs.s6}/bin" # noqa: E501
|
|
CH_DIR = "${lib.getBin package}/bin" # noqa: E501
|
|
SOCKETBINDER_PATH = S6_DIR + "/s6-ipcserver-socketbinder" # noqa: E501
|
|
CH_PATH = CH_DIR + "/cloud-hypervisor"
|
|
CHR_PATH = CH_DIR + "/ch-remote"
|
|
TAPS_PATH = "${lib.getExe uvmsPkgs.taps}" # noqa: E501
|
|
|
|
PASSTHRU_PATH = ":".join([ELB_DIR, S6_DIR, CH_DIR])
|
|
PASSTHRU_ENV = {
|
|
**{
|
|
k: v
|
|
for k, v in os.environ.items()
|
|
if k.startswith("RUST")
|
|
or k.startswith("WAYLAND")
|
|
or k in [
|
|
"TAPS_SOCK",
|
|
]
|
|
},
|
|
"HOME": os.environ.get("HOME", os.getcwd()),
|
|
"PATH": PASSTHRU_PATH,
|
|
}
|
|
|
|
|
|
def configure_exec(prefix, vm, check=True, **defaults):
|
|
|
|
def exec(*args, check=check, **kwargs):
|
|
return subprocess.run(
|
|
[*args],
|
|
**defaults,
|
|
env={
|
|
**PASSTHRU_ENV,
|
|
"PATH": PASSTHRU_PATH,
|
|
"PREFIX": prefix,
|
|
"VM": vm,
|
|
},
|
|
check=check,
|
|
cwd=prefix,
|
|
**kwargs)
|
|
|
|
def execline(*args, check=check, **kwargs):
|
|
return exec(
|
|
"execlineb", "-c", "\n".join(args),
|
|
**defaults,
|
|
executable=ELB_DIR + "/execlineb",
|
|
env={
|
|
**PASSTHRU_ENV,
|
|
"PATH": PASSTHRU_PATH,
|
|
"PREFIX": prefix,
|
|
"VM": vm,
|
|
},
|
|
check=check,
|
|
cwd=prefix,
|
|
**kwargs)
|
|
|
|
return exec, execline
|
|
|
|
|
|
def preprocess_args(args_mut):
|
|
keys = [
|
|
k
|
|
for k, v
|
|
in args_mut._get_kwargs()
|
|
if isinstance(v, str)]
|
|
for k in keys:
|
|
v = getattr(args_mut, k)
|
|
if "$HOME" in v:
|
|
setattr(
|
|
args_mut,
|
|
k,
|
|
v.replace("$HOME", PASSTHRU_ENV["HOME"]))
|
|
for k in keys:
|
|
v = getattr(args_mut, k)
|
|
if "$VM" in v:
|
|
setattr(args_mut, k, v.replace("$VM", args.vm))
|
|
for k in keys:
|
|
v = getattr(args_mut, k)
|
|
if "$PREFIX" in v:
|
|
setattr(args_mut, k, v.replace("$PREFIX", args.prefix))
|
|
return args_mut
|
|
|
|
|
|
@contextmanager
|
|
def defer(f):
|
|
try:
|
|
yield
|
|
finally:
|
|
f()
|
|
|
|
|
|
@contextmanager
|
|
def run_ch(vm_prefix):
|
|
args = [
|
|
SOCKETBINDER_PATH,
|
|
"-B",
|
|
vm_prefix + "/vmm.sock",
|
|
CH_PATH,
|
|
"--api-socket",
|
|
"fd=0",
|
|
]
|
|
p = subprocess.Popen(
|
|
args,
|
|
shell=False,
|
|
pass_fds=(2,))
|
|
try:
|
|
p.wait(1.0)
|
|
needs_cleanup = False
|
|
except subprocess.TimeoutExpired:
|
|
needs_cleanup = True
|
|
if not os.path.exists(vm_prefix + "/vmm.sock"):
|
|
raise RuntimeError(f"{vm_prefix}/vmm.sock should exist by now")
|
|
if p.returncode is not None:
|
|
raise RuntimeError("CH exited early")
|
|
try:
|
|
yield p
|
|
finally:
|
|
try:
|
|
p.poll()
|
|
except: # noqa: E722
|
|
pass
|
|
if p.returncode is None:
|
|
p.terminate() # CH handles SIG{INT,TERM}?
|
|
p.wait()
|
|
unlink_paths = [
|
|
vm_prefix + "/vmm.sock",
|
|
vm_prefix + "/vmm.sock.lock",
|
|
vm_prefix + "/vsock.sock",
|
|
] if needs_cleanup else []
|
|
for p in unlink_paths:
|
|
if os.path.exists(p):
|
|
os.remove(p)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args, args_next = parser.parse_known_args()
|
|
preprocess_args(args)
|
|
|
|
os.makedirs(args.prefix, exist_ok=True)
|
|
exec, _ = configure_exec(
|
|
prefix=args.prefix,
|
|
vm=args.vm)
|
|
|
|
ch_remote = [
|
|
"ch-remote",
|
|
"--api-socket",
|
|
args.prefix + "/vmm.sock",
|
|
]
|
|
|
|
with ExitStack() as cleanup:
|
|
ch = cleanup.enter_context(run_ch(args.prefix))
|
|
exec(*ch_remote, "create", args.vm_config)
|
|
exec(
|
|
TAPS_PATH, "pass",
|
|
*ch_remote, "add-net",
|
|
"id=wan,fd=3,mac=00:00:00:00:00:01")
|
|
exec(*ch_remote, "boot")
|
|
exec(*ch_remote, "info")
|
|
try:
|
|
ch.wait()
|
|
except KeyboardInterrupt:
|
|
pass
|
|
'';
|
|
in
|
|
writeElb "run-${hostName}" ''
|
|
${superviseVm} --vm-config=${chSettingsFile} --vm=${hostName}
|
|
'';
|
|
}
|
|
(lib.mkIf cfg.enable {
|
|
boot.initrd.availableKernelModules = [
|
|
"erofs"
|
|
"overlay"
|
|
"virtio_mmio"
|
|
"virtio_pci"
|
|
"virtio_blk"
|
|
# "9pnet_virtio"
|
|
# "9p"
|
|
"virtiofs"
|
|
];
|
|
boot.initrd.systemd.enable = lib.mkDefault true;
|
|
fileSystems = {
|
|
"/nix/store" = {
|
|
fsType = "overlay";
|
|
overlay.lowerdir = map (img: "/nix/.ro-stores/${toString img.seq}") layers;
|
|
neededForBoot = true;
|
|
};
|
|
}
|
|
// lib.listToAttrs (
|
|
map (
|
|
img:
|
|
lib.nameValuePair "/nix/.ro-stores/${toString img.seq}" {
|
|
device = "/dev/disk/by-label/${img.label}";
|
|
neededForBoot = true;
|
|
options = [ "x-systemd.device-timeout=5" ];
|
|
}
|
|
) layers
|
|
);
|
|
})
|
|
];
|
|
}
|