uvms-guest: select() to detect dead subprocesses
This commit is contained in:
parent
dd6609ba3f
commit
00ae88e8ad
3 changed files with 127 additions and 70 deletions
|
|
@ -3,74 +3,117 @@ import os
|
||||||
import select
|
import select
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
def handle_run(run: dict) -> dict:
|
class Processes:
|
||||||
res = {}
|
def __init__(self):
|
||||||
text = run.get("text", False)
|
self.processes = []
|
||||||
env = {
|
self.sources = []
|
||||||
**os.environ,
|
self.liveness_fds = dict()
|
||||||
"PATH": ":".join(
|
self.client_fds = set()
|
||||||
os.environ.get("PATH", "").split(":") + run.get("EXTRA_PATH", [])
|
|
||||||
),
|
def popen(self, *args, **kwargs):
|
||||||
}
|
a, b = socket.socketpair()
|
||||||
proc = None
|
pass_fds = [*kwargs.get("pass_fds", ()), b.fileno()]
|
||||||
try:
|
proc = subprocess.Popen(*args, **kwargs, pass_fds=pass_fds)
|
||||||
proc = subprocess.Popen(
|
self.processes.append(proc)
|
||||||
req["run"]["argv"],
|
self.sources.append(a)
|
||||||
text=text,
|
assert a.fileno() not in self.liveness_fds
|
||||||
env=env,
|
self.liveness_fds[a.fileno()] = proc
|
||||||
cwd="/home/user",
|
b.close()
|
||||||
stdin=subprocess.PIPE,
|
return proc
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
)
|
def handle_run(self, run: dict) -> dict:
|
||||||
res["status"] = "exec succeeded"
|
res = {}
|
||||||
except Exception as e:
|
text = run.get("text", False)
|
||||||
res["status"] = "exec failed"
|
env = {
|
||||||
res["exception"] = repr(e)
|
**os.environ,
|
||||||
res["pid"] = getattr(proc, "pid", None)
|
"PATH": ":".join(
|
||||||
try:
|
[
|
||||||
if proc is not None:
|
*os.environ.get("PATH", "").split(":"),
|
||||||
proc.wait(0.125)
|
*run.get(
|
||||||
res["long_running"] = False
|
"EXTRA_PATH",
|
||||||
res["returncode"] = getattr(proc, "returncode", None)
|
[],
|
||||||
except subprocess.TimeoutExpired:
|
),
|
||||||
res["long_running"] = True
|
],
|
||||||
return res, proc
|
),
|
||||||
|
}
|
||||||
|
proc = None
|
||||||
|
try:
|
||||||
|
proc = self.popen(
|
||||||
|
req["run"]["argv"],
|
||||||
|
text=text,
|
||||||
|
env=env,
|
||||||
|
cwd="/home/user",
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
res["status"] = "exec succeeded"
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
res["status"] = "exec failed"
|
||||||
|
res["exception"] = repr(e)
|
||||||
|
res["pid"] = getattr(proc, "pid", None)
|
||||||
|
try:
|
||||||
|
if proc is not None:
|
||||||
|
proc.wait(0.125)
|
||||||
|
res["long_running"] = False
|
||||||
|
res["returncode"] = getattr(proc, "returncode", None)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
res["long_running"] = True
|
||||||
|
return res, proc
|
||||||
|
|
||||||
|
def accept_vsock(self, s):
|
||||||
|
con, (cid, port) = serv.accept()
|
||||||
|
assert cid == 2, cid
|
||||||
|
self.sources.append(con)
|
||||||
|
self.client_fds.insert(con.fileno())
|
||||||
|
return con, (cid, port)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
ps = Processes()
|
||||||
serv = socket.fromfd(3, socket.AF_VSOCK, socket.SOCK_STREAM)
|
serv = socket.fromfd(3, socket.AF_VSOCK, socket.SOCK_STREAM)
|
||||||
|
ps.sources.append(serv)
|
||||||
procs = []
|
|
||||||
conns = [serv]
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
rr, rw, xs = select.select(conns, [], [])
|
rr, rw, xs = select.select(ps.sources, [], ps.sources)
|
||||||
|
|
||||||
|
for con in (*rr, *xs):
|
||||||
|
if con.fileno() in ps.liveness_fds:
|
||||||
|
assert con.recv(128) == b""
|
||||||
|
proc = ps.liveness_fds[con.fileno()]
|
||||||
|
proc.wait()
|
||||||
|
assert proc.returncode is not None, proc
|
||||||
|
print(f"{proc} has terminated, shutting down")
|
||||||
|
sys.exit(proc.returncode)
|
||||||
for con in rr:
|
for con in rr:
|
||||||
if con is serv:
|
if con is serv:
|
||||||
con, (cid, port) = serv.accept()
|
con, _ = ps.accept_vsock(serv)
|
||||||
assert cid == 2, cid
|
print(f"Open [{con.fileno()}]")
|
||||||
conns.append(con)
|
if con.fileno() in ps.liveness_fds:
|
||||||
continue
|
assert False, "Must already be processed"
|
||||||
req = con.recv(8192)
|
elif con.fileno() in ps.client_fds:
|
||||||
# IDK why but I keep getting empty messages
|
req = con.recv(8192)
|
||||||
if req == b"":
|
# IDK why but I keep getting empty messages
|
||||||
continue
|
if req == b"":
|
||||||
try:
|
print(f"Lost [{con.fileno()}]")
|
||||||
req = json.loads(req)
|
continue
|
||||||
print(f"Received {req=}")
|
try:
|
||||||
except json.JSONDecodeError as e:
|
req = json.loads(req)
|
||||||
print(f"Couldn't interpret {req=}: {e}")
|
print(f"Received {req=}")
|
||||||
continue
|
except json.JSONDecodeError as e:
|
||||||
if "run" in req:
|
print(f"Couldn't interpret {req=}: {e}")
|
||||||
res, proc = handle_run(req["run"])
|
continue
|
||||||
procs.append(proc)
|
if "run" in req:
|
||||||
|
res, proc = ps.handle_run(req["run"])
|
||||||
|
else:
|
||||||
|
res = {"status": "unknown command"}
|
||||||
|
_, rw, _ = select.select([], [con], [])
|
||||||
|
assert rw, rw
|
||||||
|
res = json.dumps(res).encode("utf8")
|
||||||
|
print(f"Responding with {res=}")
|
||||||
|
con.send(res)
|
||||||
else:
|
else:
|
||||||
res = {"status": "unknown command"}
|
assert False, con.fileno()
|
||||||
_, rw, _ = select.select([], [con], [])
|
|
||||||
assert rw, rw
|
|
||||||
res = json.dumps(res).encode("utf8")
|
|
||||||
print(f"Responding with {res=}")
|
|
||||||
con.send(res)
|
|
||||||
|
|
|
||||||
|
|
@ -452,18 +452,21 @@ def removing(*paths):
|
||||||
os.remove(p)
|
os.remove(p)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
def connect_ch_vsock(
|
def connect_ch_vsock(
|
||||||
vsock_sock_path,
|
vsock_sock_path,
|
||||||
port: int,
|
port: int,
|
||||||
type=socket.SOCK_STREAM,
|
type=socket.SOCK_STREAM,
|
||||||
blocking=True,
|
blocking=True,
|
||||||
) -> socket.socket:
|
) -> socket.socket:
|
||||||
|
os.makedirs(os.path.dirname(vsock_sock_path), exist_ok=True)
|
||||||
s = socket.socket(socket.AF_UNIX, type, 0)
|
s = socket.socket(socket.AF_UNIX, type, 0)
|
||||||
s.setblocking(blocking)
|
s.setblocking(blocking)
|
||||||
s.connect(vsock_sock_path)
|
s.connect(vsock_sock_path)
|
||||||
|
|
||||||
s.send(b"CONNECT %d\n" % port)
|
with removing(vsock_sock_path):
|
||||||
return s
|
s.send(b"CONNECT %d\n" % port)
|
||||||
|
yield s
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
|
|
@ -473,15 +476,14 @@ def listen_ch_vsock(
|
||||||
type=socket.SOCK_STREAM,
|
type=socket.SOCK_STREAM,
|
||||||
blocking=True,
|
blocking=True,
|
||||||
) -> socket.socket:
|
) -> socket.socket:
|
||||||
|
os.makedirs(os.path.dirname(vsock_sock_path), exist_ok=True)
|
||||||
listen_path = vsock_sock_path + "_%d" % port
|
listen_path = vsock_sock_path + "_%d" % port
|
||||||
s = socket.socket(socket.AF_UNIX, type, 0)
|
s = socket.socket(socket.AF_UNIX, type, 0)
|
||||||
s.setblocking(blocking)
|
s.setblocking(blocking)
|
||||||
s.bind(listen_path)
|
s.bind(listen_path)
|
||||||
s.listen()
|
s.listen()
|
||||||
try:
|
with removing(listen_path):
|
||||||
yield s
|
yield s
|
||||||
finally:
|
|
||||||
os.remove(listen_path)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args, args_next, cleanup, ps):
|
def main(args, args_next, cleanup, ps):
|
||||||
|
|
@ -589,7 +591,7 @@ def main(args, args_next, cleanup, ps):
|
||||||
ps.exec(*ch_remote, "info")
|
ps.exec(*ch_remote, "info")
|
||||||
|
|
||||||
with ready_sock:
|
with ready_sock:
|
||||||
ready_sock.settimeout(16.0)
|
ready_sock.settimeout(20.0)
|
||||||
try:
|
try:
|
||||||
con, _ = ready_sock.accept()
|
con, _ = ready_sock.accept()
|
||||||
except: # noqa: E722
|
except: # noqa: E722
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,9 @@ in
|
||||||
./on-failure.nix
|
./on-failure.nix
|
||||||
];
|
];
|
||||||
config = {
|
config = {
|
||||||
some.failure-handler.enable = true;
|
# some.failure-handler.enable = true;
|
||||||
hardware.graphics.enable = true;
|
hardware.graphics.enable = true;
|
||||||
# boot.kernelPackages = pkgs.linuxPackagesFor uvmsPkgs.linux-uvm;
|
boot.kernelPackages = pkgs.linuxPackagesFor uvmsPkgs.linux-uvm;
|
||||||
# boot.isContainer = true;
|
# boot.isContainer = true;
|
||||||
boot.initrd.kernelModules = [
|
boot.initrd.kernelModules = [
|
||||||
"drm"
|
"drm"
|
||||||
|
|
@ -256,14 +256,26 @@ in
|
||||||
partOf = [ "uvms-guest.service" ];
|
partOf = [ "uvms-guest.service" ];
|
||||||
};
|
};
|
||||||
systemd.services."uvms-guest" = {
|
systemd.services."uvms-guest" = {
|
||||||
|
requiredBy = [ "multi-user.target" ];
|
||||||
|
onFailure = [ "shutdown.service" ];
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
User = "user";
|
User = "user";
|
||||||
Group = "users";
|
Group = "users";
|
||||||
ExecStart = "${lib.getExe uvmsPkgs.uvms-guest}";
|
ExecStart = "${lib.getExe uvmsPkgs.uvms-guest}";
|
||||||
|
ExecStop = [
|
||||||
|
"/run/current-system/sw/bin/echo GUEST DOWN"
|
||||||
|
"/run/current-system/sw/bin/systemctl poweroff"
|
||||||
|
];
|
||||||
|
StandardOutput = "journal+console";
|
||||||
|
StandardError = "journal+console";
|
||||||
|
Restart = "no";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
systemd.services."shutdown" = {
|
||||||
|
serviceConfig = {
|
||||||
|
ExecStart = [ "/run/current-system/sw/bin/systemctl poweroff" ];
|
||||||
StandardOutput = "journal+console";
|
StandardOutput = "journal+console";
|
||||||
StandardError = "journal+console";
|
StandardError = "journal+console";
|
||||||
Restart = "on-failure";
|
|
||||||
RestartSec = 5;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -371,7 +383,7 @@ in
|
||||||
options = {
|
options = {
|
||||||
size = mkOption {
|
size = mkOption {
|
||||||
type = types.int;
|
type = types.int;
|
||||||
default = 1536 * 1048576;
|
default = 3 * 1024 * 1048576;
|
||||||
};
|
};
|
||||||
shared = mkOption {
|
shared = mkOption {
|
||||||
type = types.bool;
|
type = types.bool;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue