nixos-container: Fix `destroy` terminating before it's done. Fixes #32545.

This also fixes the race condition found in #32551.

And it fixes nixops's repeated destroy/deploy being broken
(https://github.com/NixOS/nixops/issues/809).
This commit is contained in:
Niklas Hambüchen 2017-12-11 04:05:15 +01:00
parent 857a71cbc5
commit 5d83988c1e
1 changed files with 25 additions and 15 deletions

View File

@ -7,6 +7,7 @@ use File::Slurp;
use Fcntl ':flock'; use Fcntl ':flock';
use Getopt::Long qw(:config gnu_getopt); use Getopt::Long qw(:config gnu_getopt);
use Cwd 'abs_path'; use Cwd 'abs_path';
use Time::HiRes;
my $nsenter = "@utillinux@/bin/nsenter"; my $nsenter = "@utillinux@/bin/nsenter";
my $su = "@su@"; my $su = "@su@";
@ -214,21 +215,6 @@ if (!-e $confFile) {
die "$0: container $containerName does not exist\n" ; die "$0: container $containerName does not exist\n" ;
} }
sub isContainerRunning {
my $status = `systemctl show 'container\@$containerName'`;
return $status =~ /ActiveState=active/;
}
sub terminateContainer {
system("machinectl", "terminate", $containerName) == 0
or die "$0: failed to terminate container\n";
}
sub stopContainer {
system("systemctl", "stop", "container\@$containerName") == 0
or die "$0: failed to stop container\n";
}
# Return the PID of the init process of the container. # Return the PID of the init process of the container.
sub getLeader { sub getLeader {
my $s = `machinectl show "$containerName" -p Leader`; my $s = `machinectl show "$containerName" -p Leader`;
@ -237,6 +223,30 @@ sub getLeader {
return int($1); return int($1);
} }
sub isContainerRunning {
my $status = `systemctl show 'container\@$containerName'`;
return $status =~ /ActiveState=active/;
}
sub terminateContainer {
my $leader = getLeader;
system("machinectl", "terminate", $containerName) == 0
or die "$0: failed to terminate container\n";
# Wait for the leader process to exit
# TODO: As for any use of PIDs for process control where the process is
# not a direct child of ours, this can go wrong when the pid gets
# recycled after a PID overflow.
# Relying entirely on some form of UUID provided by machinectl
# instead of PIDs would remove this risk.
# See https://github.com/NixOS/nixpkgs/pull/32992#discussion_r158586048
while ( kill 0, $leader ) { Time::HiRes::sleep(0.1) }
}
sub stopContainer {
system("systemctl", "stop", "container\@$containerName") == 0
or die "$0: failed to stop container\n";
}
# Run a command in the container. # Run a command in the container.
sub runInContainer { sub runInContainer {
my @args = @_; my @args = @_;