nixos/nix-containers: use SIGTERM to stop containers

systemd-nspawn can react to SIGTERM and send a shutdown signal to the container
init process. use that instead of going through dbus and machined to request
nspawn sending the signal, since during host shutdown machined or dbus may have
gone away by the point a container unit is stopped.

to solve the issue that a container that is still starting cannot be stopped
cleanly we must also handle this signal in containerInit/stage-2.
This commit is contained in:
pennae 2021-04-25 19:36:51 +02:00
parent 265d31bcbd
commit 82931ea446
1 changed files with 17 additions and 11 deletions

View File

@ -35,6 +35,9 @@ let
''
#! ${pkgs.runtimeShell} -e
# Exit early if we're asked to shut down.
trap "exit 0" SIGRTMIN+3
# Initialise the container side of the veth pair.
if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
[ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
@ -60,8 +63,12 @@ let
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
# Start the regular stage 1 script.
exec "$1"
# Start the regular stage 2 script.
# We source instead of exec to not lose an early stop signal, which is
# also the only _reliable_ shutdown signal we have since early stop
# does not execute ExecStop* commands.
set +e
. "$1"
''
);
@ -127,12 +134,16 @@ let
''}
# Run systemd-nspawn without startup notification (we'll
# wait for the container systemd to signal readiness).
# wait for the container systemd to signal readiness)
# Kill signal handling means systemd-nspawn will pass a system-halt signal
# to the container systemd when it receives SIGTERM for container shutdown;
# containerInit and stage2 have to handle this as well.
exec ${config.systemd.package}/bin/systemd-nspawn \
--keep-unit \
-M "$INSTANCE" -D "$root" $extraFlags \
$EXTRA_NSPAWN_FLAGS \
--notify-ready=yes \
--kill-signal=SIGRTMIN+3 \
--bind-ro=/nix/store \
--bind-ro=/nix/var/nix/db \
--bind-ro=/nix/var/nix/daemon-socket \
@ -259,13 +270,10 @@ let
Slice = "machine.slice";
Delegate = true;
# Hack: we don't want to kill systemd-nspawn, since we call
# "machinectl poweroff" in preStop to shut down the
# container cleanly. But systemd requires sending a signal
# (at least if we want remaining processes to be killed
# after the timeout). So send an ignored signal.
# We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown
# signal (SIGRTMIN+3) for the inner container.
KillMode = "mixed";
KillSignal = "WINCH";
KillSignal = "TERM";
DevicePolicy = "closed";
DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
@ -752,8 +760,6 @@ in
postStart = postStartScript dummyConfig;
preStop = "machinectl poweroff $INSTANCE";
restartIfChanged = false;
serviceConfig = serviceDirectives dummyConfig;