From 0bd986f97fcdc0235bf1854998c16cd51336e256 Mon Sep 17 00:00:00 2001 From: Val Packett Date: Thu, 4 Dec 2025 06:59:50 -0300 Subject: [PATCH] Introduce micro-activate (RIIR activate script + tiny bit of tmpfiles) Instead of interpreting all that shell and running actual tmpfiles, use a tiny stage before systemd that mounts a tmpfs at /run (preventing systemd from doing the same), populates it with NixOS symlinks and preserved resolv.conf, and mounts the immutable /etc overlay before passing control over to systemd. --- .gitignore | 2 + micro-activate.rs | 124 +++++++++++++++++++++++++++++++++++++ munix | 27 +++----- nixosModules/default.nix | 21 ------- packages/munix/default.nix | 17 ++++- 5 files changed, 151 insertions(+), 40 deletions(-) create mode 100644 micro-activate.rs diff --git a/.gitignore b/.gitignore index 9cb78cc..90acb92 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ result /testvm* +/target +/micro-activate .direnv/ diff --git a/micro-activate.rs b/micro-activate.rs new file mode 100644 index 0000000..73d7e3c --- /dev/null +++ b/micro-activate.rs @@ -0,0 +1,124 @@ +use std::os::raw::{c_char, c_int, c_ulong, c_void}; +use std::os::unix::ffi::OsStrExt; +use std::os::unix::process::CommandExt; + +const MS_RDONLY: c_ulong = 0x01; +const MS_NOSUID: c_ulong = 0x02; +const MS_NODEV: c_ulong = 0x04; +const MS_RELATIME: c_ulong = 0x200000; +const MS_STRICTATIME: c_ulong = 0x1000000; + +unsafe extern "C" { + fn mount( + src: *const c_char, + target: *const c_char, + fstype: *const c_char, + flags: c_ulong, + data: *const c_void, + ) -> c_int; +} + +fn parse_tmpfiles_line(line: &str) -> Option<(&str, &str)> { + // NOTE: does not support actual whitespace inside quotes + // (that's not gonna appear in these files we parse) + let mut it = line + .split_whitespace() + .map(|s| s.trim_start_matches('\'').trim_end_matches('\'')); + let instr = it.next()?; + if !instr.starts_with('L') { + return None; + } + let src = it.next()?; + let _ = it.next()?; + let _ = it.next()?; + let _ = it.next()?; + let _ = it.next()?; + let dst = it.next()?; + Some((src, dst)) +} + +fn link_tmpfiles(contents: &[u8]) -> Result<(), std::io::Error> { + for (src, dst) in str::from_utf8(contents) + .unwrap() + .lines() + .flat_map(parse_tmpfiles_line) + { + std::os::unix::fs::symlink(dst, src)?; + } + Ok(()) +} + +fn main() -> Result<(), std::io::Error> { + let closure = std::env::var("MICROVM_CLOSURE").unwrap(); + + // systemd really wants /run to be a mountpoint and will mount a tmpfs on its own + // if it's not already a mountpoint. Well, it's correct: reaching into virtiofs + // (which is what not-mounting would entail) for /run stuff is not great. + // + // Let's preserve the fixed passed-in files and set up the NixOS symlinks in the new mount. + let resolv_conf = std::fs::read("/run/resolv.conf")?; + let machine_id = std::fs::read("/run/machine-id")?; + assert_eq!( + unsafe { + mount( + c"tmpfs".as_ptr(), + c"/run".as_ptr(), + c"tmpfs".as_ptr(), + MS_NOSUID | MS_NODEV | MS_STRICTATIME, + std::ptr::null(), + ) + }, + 0 + ); + std::fs::write("/run/resolv.conf", &resolv_conf)?; + std::fs::write("/run/machine-id", &machine_id)?; + std::os::unix::fs::symlink(&closure, "/run/current-system")?; + if let Ok(tmp_graphics) = + std::fs::read(format!("{closure}/etc/tmpfiles.d/graphics-driver.conf")) + { + link_tmpfiles(&tmp_graphics)?; + } else { + eprintln!("[micro-activate] Could not find the closure's graphics-driver.conf!"); + } + + // We need the /etc metadata overlay not just for abstract correctness, but even just to + // allow the regular user to run systemctl (it doesn't like passwd being owned by non-root).. + let metadata_img = std::fs::read_link(format!("{closure}/etc-metadata-image")) + .expect("The closure must use an immutable /etc overlay!"); + let basedir = std::fs::read_link(format!("{closure}/etc-basedir")) + .expect("The closure must use an immutable /etc overlay!"); + let overlay_opts = std::ffi::CString::new(format!( + "redirect_dir=on,metacopy=on,lowerdir=/run/etc.meta::{}", + basedir.display() + )) + .unwrap(); + std::fs::create_dir("/run/etc.meta")?; + std::fs::remove_file("/etc")?; + std::fs::create_dir("/etc")?; + unsafe { + assert_eq!( + mount( + metadata_img.as_os_str().as_bytes().as_ptr(), + c"/run/etc.meta".as_ptr(), + c"erofs".as_ptr(), + MS_RDONLY | MS_NODEV | MS_NOSUID, + std::ptr::null(), + ), + 0 + ); + assert_eq!( + mount( + c"overlay".as_ptr(), + c"/etc".as_ptr(), + c"overlay".as_ptr(), + MS_NODEV | MS_NOSUID | MS_RELATIME, + overlay_opts.as_ptr() as *const c_void, + ), + 0 + ); + } + + let mut args = std::env::args_os().skip(1); + let cmd = args.next().unwrap(); + Err(std::process::Command::new(cmd).args(args).exec()) +} diff --git a/munix b/munix index 2cf6257..4bdf635 100755 --- a/munix +++ b/munix @@ -180,38 +180,31 @@ BWRAP_ARGS+=( bwrap --unshare-all --share-net \ --uid $MICROVM_UID --gid $MICROVM_GID \ --tmpfs / \ - --dir /run --dir /var --symlink /run /var/run --dir /tmp --dir /mnt \ + --dir /run --dir /var --symlink /run /var/run --dir /tmp --dir /mnt --dir /bin --dir /usr/bin \ --proc /proc --ro-bind /sys /sys \ --dev /dev --dir /dev/input --dev-bind /dev/kvm /dev/kvm \ --ro-bind "$MUVM_PATH" /run/munix/muvm \ --ro-bind "$PASST_PATH" /run/munix/passt \ + --ro-bind "$SCRIPT_PATH/micro-activate" /opt/bin/micro-activate \ --ro-bind "$MUVM_PATH/muvm-guest" /opt/bin/muvm-remote \ --ro-bind "$MUVM_PATH/muvm-guest" /opt/bin/muvm-configure-network \ --ro-bind "$MUVM_PATH/muvm-guest" /opt/bin/muvm-pwbridge \ - --symlink "$MICROVM_CLOSURE/etc/systemd" /etc/systemd \ + --symlink "$MICROVM_CLOSURE/etc" /etc \ + --symlink "$MICROVM_CLOSURE/sw/bin/sh" /bin/sh \ + --symlink "$MICROVM_CLOSURE/sw/bin/env" /usr/bin/env \ + --symlink "$MICROVM_CLOSURE" /run/current-system \ --ro-bind /nix/store /nix/store \ - --ro-bind /run/systemd/resolve /run/systemd/resolve \ - --file 11 /etc/passwd \ - --file 12 /etc/group \ - --file 13 /etc/resolv.conf \ + --file 12 /run/machine-id \ + --file 13 /run/resolv.conf \ --dir "$XDG_RUNTIME_DIR" \ --setenv PATH "/run/munix/muvm:/run/munix/passt:$MICROVM_CLOSURE/sw/bin" \ "${BWRAP_ARGS[@]}" \ muvm \ - --custom-init-cmdline "$MICROVM_CLOSURE/sw/sbin/init --log-target=console systemd.set_credential=sidebus.port:50000" \ + --custom-init-cmdline "/opt/bin/micro-activate $MICROVM_CLOSURE/sw/sbin/init --log-target=console systemd.set_credential=sidebus.port:50000" \ "${MUVM_ARGS[@]}" \ -e container=munix \ -e MICROVM_CLOSURE="$MICROVM_CLOSURE" \ -e MICROVM_UID="$MICROVM_UID" -e MICROVM_GID="$MICROVM_GID" \ -i -t "${MICROVM_COMMAND[@]}" \ - 11< <(cat <