Merge pull request #18822 from wlhlm/containers-dev-tun
Allow access to /dev/net/tun inside containers
This commit is contained in:
commit
a8c172ca4b
|
@ -129,6 +129,9 @@ let
|
||||||
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
|
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
|
||||||
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
|
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
|
||||||
--setenv PATH="$PATH" \
|
--setenv PATH="$PATH" \
|
||||||
|
${if cfg.additionalCapabilities != null then
|
||||||
|
''--capability="${concatStringsSep " " cfg.additionalCapabilities}"'' else ""
|
||||||
|
} \
|
||||||
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
@ -205,6 +208,41 @@ let
|
||||||
''
|
''
|
||||||
);
|
);
|
||||||
|
|
||||||
|
serviceDirectives = cfg: {
|
||||||
|
ExecReload = pkgs.writeScript "reload-container"
|
||||||
|
''
|
||||||
|
#! ${pkgs.stdenv.shell} -e
|
||||||
|
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
|
||||||
|
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
|
||||||
|
'';
|
||||||
|
|
||||||
|
SyslogIdentifier = "container %i";
|
||||||
|
|
||||||
|
EnvironmentFile = "-/etc/containers/%i.conf";
|
||||||
|
|
||||||
|
Type = "notify";
|
||||||
|
|
||||||
|
# Note that on reboot, systemd-nspawn returns 133, so this
|
||||||
|
# unit will be restarted. On poweroff, it returns 0, so the
|
||||||
|
# unit won't be restarted.
|
||||||
|
RestartForceExitStatus = "133";
|
||||||
|
SuccessExitStatus = "133";
|
||||||
|
|
||||||
|
Restart = "on-failure";
|
||||||
|
|
||||||
|
# Hack: we don't want to kill systemd-nspawn, since we call
|
||||||
|
# "machinectl poweroff" in preStop to shut down the
|
||||||
|
# container cleanly. But systemd requires sending a signal
|
||||||
|
# (at least if we want remaining processes to be killed
|
||||||
|
# after the timeout). So send an ignored signal.
|
||||||
|
KillMode = "mixed";
|
||||||
|
KillSignal = "WINCH";
|
||||||
|
|
||||||
|
DevicePolicy = "closed";
|
||||||
|
DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
system = config.nixpkgs.system;
|
system = config.nixpkgs.system;
|
||||||
|
|
||||||
bindMountOpts = { name, config, ... }: {
|
bindMountOpts = { name, config, ... }: {
|
||||||
|
@ -235,6 +273,27 @@ let
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
allowedDeviceOpts = { name, config, ... }: {
|
||||||
|
options = {
|
||||||
|
node = mkOption {
|
||||||
|
example = "/dev/net/tun";
|
||||||
|
type = types.str;
|
||||||
|
description = "Path to device node";
|
||||||
|
};
|
||||||
|
modifier = mkOption {
|
||||||
|
example = "rw";
|
||||||
|
type = types.str;
|
||||||
|
description = ''
|
||||||
|
Device node access modifier. Takes a combination
|
||||||
|
<literal>r</literal> (read), <literal>w</literal> (write), and
|
||||||
|
<literal>m</literal> (mknod). See the
|
||||||
|
<literal>systemd.resource-control(5)</literal> man page for more
|
||||||
|
information.'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
mkBindFlag = d:
|
mkBindFlag = d:
|
||||||
let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
|
let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
|
||||||
mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
|
mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
|
||||||
|
@ -302,6 +361,8 @@ let
|
||||||
dummyConfig =
|
dummyConfig =
|
||||||
{
|
{
|
||||||
extraVeths = {};
|
extraVeths = {};
|
||||||
|
additionalCapabilities = [];
|
||||||
|
allowedDevices = [];
|
||||||
hostAddress = null;
|
hostAddress = null;
|
||||||
hostAddress6 = null;
|
hostAddress6 = null;
|
||||||
localAddress = null;
|
localAddress = null;
|
||||||
|
@ -368,6 +429,26 @@ in
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
additionalCapabilities = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [];
|
||||||
|
example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
|
||||||
|
description = ''
|
||||||
|
Grant additional capabilities to the container. See the
|
||||||
|
capabilities(7) and systemd-nspawn(1) man pages for more
|
||||||
|
information.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
enableTun = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = false;
|
||||||
|
description = ''
|
||||||
|
Allows the container to create and setup tunnel interfaces
|
||||||
|
by granting the <literal>NET_ADMIN</literal> capability and
|
||||||
|
enabling access to <literal>/dev/net/tun</literal>.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
privateNetwork = mkOption {
|
privateNetwork = mkOption {
|
||||||
type = types.bool;
|
type = types.bool;
|
||||||
default = false;
|
default = false;
|
||||||
|
@ -422,6 +503,16 @@ in
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
allowedDevices = mkOption {
|
||||||
|
type = types.listOf types.optionSet;
|
||||||
|
options = [ allowedDeviceOpts ];
|
||||||
|
default = [];
|
||||||
|
example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
|
||||||
|
description = ''
|
||||||
|
A list of device nodes to which the containers has access to.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
} // networkOptions;
|
} // networkOptions;
|
||||||
|
|
||||||
config = mkMerge
|
config = mkMerge
|
||||||
|
@ -488,59 +579,39 @@ in
|
||||||
|
|
||||||
restartIfChanged = false;
|
restartIfChanged = false;
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = serviceDirectives dummyConfig;
|
||||||
ExecReload = pkgs.writeScript "reload-container"
|
|
||||||
''
|
|
||||||
#! ${pkgs.stdenv.shell} -e
|
|
||||||
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
|
|
||||||
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
|
|
||||||
'';
|
|
||||||
|
|
||||||
SyslogIdentifier = "container %i";
|
|
||||||
|
|
||||||
EnvironmentFile = "-/etc/containers/%i.conf";
|
|
||||||
|
|
||||||
Type = "notify";
|
|
||||||
|
|
||||||
# Note that on reboot, systemd-nspawn returns 133, so this
|
|
||||||
# unit will be restarted. On poweroff, it returns 0, so the
|
|
||||||
# unit won't be restarted.
|
|
||||||
RestartForceExitStatus = "133";
|
|
||||||
SuccessExitStatus = "133";
|
|
||||||
|
|
||||||
Restart = "on-failure";
|
|
||||||
|
|
||||||
# Hack: we don't want to kill systemd-nspawn, since we call
|
|
||||||
# "machinectl poweroff" in preStop to shut down the
|
|
||||||
# container cleanly. But systemd requires sending a signal
|
|
||||||
# (at least if we want remaining processes to be killed
|
|
||||||
# after the timeout). So send an ignored signal.
|
|
||||||
KillMode = "mixed";
|
|
||||||
KillSignal = "WINCH";
|
|
||||||
|
|
||||||
DevicePolicy = "closed";
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
in {
|
in {
|
||||||
systemd.services = listToAttrs (filter (x: x.value != null) (
|
systemd.services = listToAttrs (filter (x: x.value != null) (
|
||||||
# The generic container template used by imperative containers
|
# The generic container template used by imperative containers
|
||||||
[{ name = "container@"; value = unit; }]
|
[{ name = "container@"; value = unit; }]
|
||||||
# declarative containers
|
# declarative containers
|
||||||
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (
|
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
|
||||||
unit // {
|
config = cfg // (
|
||||||
preStart = preStartScript cfg;
|
if cfg.enableTun then
|
||||||
script = startScript cfg;
|
{
|
||||||
postStart = postStartScript cfg;
|
allowedDevices = cfg.allowedDevices
|
||||||
} // (
|
++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
|
||||||
if cfg.autoStart then
|
additionalCapabilities = cfg.additionalCapabilities
|
||||||
{
|
++ [ "CAP_NET_ADMIN" ];
|
||||||
wantedBy = [ "multi-user.target" ];
|
}
|
||||||
wants = [ "network.target" ];
|
else {});
|
||||||
after = [ "network.target" ];
|
in
|
||||||
restartTriggers = [ cfg.path ];
|
unit // {
|
||||||
reloadIfChanged = true;
|
preStart = preStartScript config;
|
||||||
}
|
script = startScript config;
|
||||||
else {})
|
postStart = postStartScript config;
|
||||||
|
serviceConfig = serviceDirectives config;
|
||||||
|
} // (
|
||||||
|
if config.autoStart then
|
||||||
|
{
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
wants = [ "network.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
restartTriggers = [ config.path ];
|
||||||
|
reloadIfChanged = true;
|
||||||
|
}
|
||||||
|
else {})
|
||||||
)) config.containers)
|
)) config.containers)
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue