diff --git a/nixos/doc/manual/release-notes/rl-1903.xml b/nixos/doc/manual/release-notes/rl-1903.xml
index bbd3cf2e9db..7c94f6e9473 100644
--- a/nixos/doc/manual/release-notes/rl-1903.xml
+++ b/nixos/doc/manual/release-notes/rl-1903.xml
@@ -68,6 +68,17 @@
for details.
+
+
+ There is now a set of options for
+ , which allows to restrict services
+ into a
+ chroot
+ 2
+ ed environment that only contains the store paths from
+ the runtime closure of the service.
+
+
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index fad7f336c99..374e39f553f 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -172,6 +172,7 @@
./security/rtkit.nix
./security/wrappers/default.nix
./security/sudo.nix
+ ./security/systemd-confinement.nix
./services/admin/oxidized.nix
./services/admin/salt/master.nix
./services/admin/salt/minion.nix
diff --git a/nixos/modules/security/systemd-confinement.nix b/nixos/modules/security/systemd-confinement.nix
new file mode 100644
index 00000000000..cd4eb81dbe1
--- /dev/null
+++ b/nixos/modules/security/systemd-confinement.nix
@@ -0,0 +1,199 @@
+{ config, pkgs, lib, ... }:
+
+let
+ toplevelConfig = config;
+ inherit (lib) types;
+ inherit (import ../system/boot/systemd-lib.nix {
+ inherit config pkgs lib;
+ }) mkPathSafeName;
+in {
+ options.systemd.services = lib.mkOption {
+ type = types.attrsOf (types.submodule ({ name, config, ... }: {
+ options.confinement.enable = lib.mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ If set, all the required runtime store paths for this service are
+ bind-mounted into a tmpfs-based
+ chroot
+ 2
+ .
+ '';
+ };
+
+ options.confinement.fullUnit = lib.mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Whether to include the full closure of the systemd unit file into the
+ chroot, instead of just the dependencies for the executables.
+
+ While it may be tempting to just enable this option to
+ make things work quickly, please be aware that this might add paths
+ to the closure of the chroot that you didn't anticipate. It's better
+ to use to explicitly add additional store paths to the
+ chroot.
+ '';
+ };
+
+ options.confinement.packages = lib.mkOption {
+ type = types.listOf (types.either types.str types.package);
+ default = [];
+ description = let
+ mkScOption = optName: "";
+ in ''
+ Additional packages or strings with context to add to the closure of
+ the chroot. By default, this includes all the packages from the
+ ${lib.concatMapStringsSep ", " mkScOption [
+ "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop"
+ "ExecStopPost"
+ ]} and ${mkScOption "ExecStart"} options. If you want to have all the
+ dependencies of this systemd unit, you can use
+ .
+
+ The store paths listed in are
+ not included in the closure as
+ well as paths from other options except those listed
+ above.
+ '';
+ };
+
+ options.confinement.binSh = lib.mkOption {
+ type = types.nullOr types.path;
+ default = toplevelConfig.environment.binsh;
+ defaultText = "config.environment.binsh";
+ example = lib.literalExample "\${pkgs.dash}/bin/dash";
+ description = ''
+ The program to make available as /bin/sh inside
+ the chroot. If this is set to null, no
+ /bin/sh is provided at all.
+
+ This is useful for some applications, which for example use the
+
+ system
+ 3
+ library function to execute commands.
+ '';
+ };
+
+ options.confinement.mode = lib.mkOption {
+ type = types.enum [ "full-apivfs" "chroot-only" ];
+ default = "full-apivfs";
+ description = ''
+ The value full-apivfs (the default) sets up
+ private /dev, /proc, /sys and /tmp file systems in a separate user
+ name space.
+
+ If this is set to chroot-only, only the file
+ system name space is set up along with the call to
+ chroot
+ 2
+ .
+
+ This doesn't cover network namespaces and is solely for
+ file system level isolation.
+ '';
+ };
+
+ config = let
+ rootName = "${mkPathSafeName name}-chroot";
+ inherit (config.confinement) binSh fullUnit;
+ wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs");
+ in lib.mkIf config.confinement.enable {
+ serviceConfig = {
+ RootDirectory = pkgs.runCommand rootName {} "mkdir \"$out\"";
+ TemporaryFileSystem = "/";
+ PrivateMounts = lib.mkDefault true;
+
+ # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
+ # to change some of these to default to true.
+ #
+ # If we run in chroot-only mode, having something like PrivateDevices
+ # set to true by default will mount /dev within the chroot, whereas
+ # with "chroot-only" it's expected that there are no /dev, /proc and
+ # /sys file systems available.
+ #
+ # However, if this suddenly becomes true, the attack surface will
+ # increase, so let's explicitly set these options to true/false
+ # depending on the mode.
+ MountAPIVFS = wantsAPIVFS;
+ PrivateDevices = wantsAPIVFS;
+ PrivateTmp = wantsAPIVFS;
+ PrivateUsers = wantsAPIVFS;
+ ProtectControlGroups = wantsAPIVFS;
+ ProtectKernelModules = wantsAPIVFS;
+ ProtectKernelTunables = wantsAPIVFS;
+ };
+ confinement.packages = let
+ execOpts = [
+ "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop"
+ "ExecStopPost"
+ ];
+ execPkgs = lib.concatMap (opt: let
+ isSet = config.serviceConfig ? ${opt};
+ in lib.optional isSet config.serviceConfig.${opt}) execOpts;
+ unitAttrs = toplevelConfig.systemd.units."${name}.service";
+ allPkgs = lib.singleton (builtins.toJSON unitAttrs);
+ unitPkgs = if fullUnit then allPkgs else execPkgs;
+ in unitPkgs ++ lib.optional (binSh != null) binSh;
+ };
+ }));
+ };
+
+ config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let
+ whatOpt = optName: "The 'serviceConfig' option '${optName}' for"
+ + " service '${name}' is enabled in conjunction with"
+ + " 'confinement.enable'";
+ in lib.optionals cfg.confinement.enable [
+ { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false;
+ message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd"
+ + " doesn't support restricting bind-mounts to 'ExecStart'."
+ + " Please either define a separate service or find a way to run"
+ + " commands other than ExecStart within the chroot.";
+ }
+ { assertion = !cfg.serviceConfig.DynamicUser or false;
+ message = "${whatOpt "DynamicUser"}. Please create a dedicated user via"
+ + " the 'users.users' option instead as this combination is"
+ + " currently not supported.";
+ }
+ ]) config.systemd.services);
+
+ config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let
+ rootPaths = let
+ contents = lib.concatStringsSep "\n" cfg.confinement.packages;
+ in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents;
+
+ chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" {
+ closureInfo = pkgs.closureInfo { inherit rootPaths; };
+ serviceName = "${name}.service";
+ excludedPath = rootPaths;
+ } ''
+ mkdir -p "$out/lib/systemd/system"
+ serviceFile="$out/lib/systemd/system/$serviceName"
+
+ echo '[Service]' > "$serviceFile"
+
+ # /bin/sh is special here, because the option value could contain a
+ # symlink and we need to properly resolve it.
+ ${lib.optionalString (cfg.confinement.binSh != null) ''
+ binsh=${lib.escapeShellArg cfg.confinement.binSh}
+ realprog="$(readlink -e "$binsh")"
+ echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile"
+ ''}
+
+ while read storePath; do
+ if [ -L "$storePath" ]; then
+ # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths,
+ # so let's just bind-mount the target to that location.
+ echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath"
+ elif [ "$storePath" != "$excludedPath" ]; then
+ echo "BindReadOnlyPaths=$storePath"
+ fi
+ done < "$closureInfo/store-paths" >> "$serviceFile"
+ '';
+ in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services);
+}
diff --git a/nixos/modules/system/boot/systemd-lib.nix b/nixos/modules/system/boot/systemd-lib.nix
index 68a40377ee1..28ad4f121bb 100644
--- a/nixos/modules/system/boot/systemd-lib.nix
+++ b/nixos/modules/system/boot/systemd-lib.nix
@@ -9,12 +9,11 @@ in rec {
shellEscape = s: (replaceChars [ "\\" ] [ "\\\\" ] s);
+ mkPathSafeName = lib.replaceChars ["@" ":" "\\" "[" "]"] ["-" "-" "-" "" ""];
+
makeUnit = name: unit:
- let
- pathSafeName = lib.replaceChars ["@" ":" "\\" "[" "]"] ["-" "-" "-" "" ""] name;
- in
if unit.enable then
- pkgs.runCommand "unit-${pathSafeName}"
+ pkgs.runCommand "unit-${mkPathSafeName name}"
{ preferLocalBuild = true;
allowSubstitutes = false;
inherit (unit) text;
@@ -24,7 +23,7 @@ in rec {
echo -n "$text" > $out/${shellEscape name}
''
else
- pkgs.runCommand "unit-${pathSafeName}-disabled"
+ pkgs.runCommand "unit-${mkPathSafeName name}-disabled"
{ preferLocalBuild = true;
allowSubstitutes = false;
}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index 49bbe24fdc0..395dc22f968 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -221,6 +221,7 @@ in
switchTest = handleTest ./switch-test.nix {};
syncthing-relay = handleTest ./syncthing-relay.nix {};
systemd = handleTest ./systemd.nix {};
+ systemd-confinement = handleTest ./systemd-confinement.nix {};
taskserver = handleTest ./taskserver.nix {};
telegraf = handleTest ./telegraf.nix {};
tomcat = handleTest ./tomcat.nix {};
diff --git a/nixos/tests/systemd-confinement.nix b/nixos/tests/systemd-confinement.nix
new file mode 100644
index 00000000000..b7b10fb36aa
--- /dev/null
+++ b/nixos/tests/systemd-confinement.nix
@@ -0,0 +1,168 @@
+import ./make-test.nix {
+ name = "systemd-confinement";
+
+ machine = { pkgs, lib, ... }: let
+ testServer = pkgs.writeScript "testserver.sh" ''
+ #!${pkgs.stdenv.shell}
+ export PATH=${lib.escapeShellArg "${pkgs.coreutils}/bin"}
+ ${lib.escapeShellArg pkgs.stdenv.shell} 2>&1
+ echo "exit-status:$?"
+ '';
+
+ testClient = pkgs.writeScriptBin "chroot-exec" ''
+ #!${pkgs.stdenv.shell} -e
+ output="$(echo "$@" | nc -NU "/run/test$(< /teststep).sock")"
+ ret="$(echo "$output" | sed -nre '$s/^exit-status:([0-9]+)$/\1/p')"
+ echo "$output" | head -n -1
+ exit "''${ret:-1}"
+ '';
+
+ mkTestStep = num: { description, config ? {}, testScript }: {
+ systemd.sockets."test${toString num}" = {
+ description = "Socket for Test Service ${toString num}";
+ wantedBy = [ "sockets.target" ];
+ socketConfig.ListenStream = "/run/test${toString num}.sock";
+ socketConfig.Accept = true;
+ };
+
+ systemd.services."test${toString num}@" = {
+ description = "Confined Test Service ${toString num}";
+ confinement = (config.confinement or {}) // { enable = true; };
+ serviceConfig = (config.serviceConfig or {}) // {
+ ExecStart = testServer;
+ StandardInput = "socket";
+ };
+ } // removeAttrs config [ "confinement" "serviceConfig" ];
+
+ __testSteps = lib.mkOrder num ''
+ subtest '${lib.escape ["\\" "'"] description}', sub {
+ $machine->succeed('echo ${toString num} > /teststep');
+ ${testScript}
+ };
+ '';
+ };
+
+ in {
+ imports = lib.imap1 mkTestStep [
+ { description = "chroot-only confinement";
+ config.confinement.mode = "chroot-only";
+ testScript = ''
+ $machine->succeed(
+ 'test "$(chroot-exec ls -1 / | paste -sd,)" = bin,nix',
+ 'test "$(chroot-exec id -u)" = 0',
+ 'chroot-exec chown 65534 /bin',
+ );
+ '';
+ }
+ { description = "full confinement with APIVFS";
+ testScript = ''
+ $machine->fail(
+ 'chroot-exec ls -l /etc',
+ 'chroot-exec ls -l /run',
+ 'chroot-exec chown 65534 /bin',
+ );
+ $machine->succeed(
+ 'test "$(chroot-exec id -u)" = 0',
+ 'chroot-exec chown 0 /bin',
+ );
+ '';
+ }
+ { description = "check existence of bind-mounted /etc";
+ config.serviceConfig.BindReadOnlyPaths = [ "/etc" ];
+ testScript = ''
+ $machine->succeed('test -n "$(chroot-exec cat /etc/passwd)"');
+ '';
+ }
+ { description = "check if User/Group really runs as non-root";
+ config.serviceConfig.User = "chroot-testuser";
+ config.serviceConfig.Group = "chroot-testgroup";
+ testScript = ''
+ $machine->succeed('chroot-exec ls -l /dev');
+ $machine->succeed('test "$(chroot-exec id -u)" != 0');
+ $machine->fail('chroot-exec touch /bin/test');
+ '';
+ }
+ (let
+ symlink = pkgs.runCommand "symlink" {
+ target = pkgs.writeText "symlink-target" "got me\n";
+ } "ln -s \"$target\" \"$out\"";
+ in {
+ description = "check if symlinks are properly bind-mounted";
+ config.confinement.packages = lib.singleton symlink;
+ testScript = ''
+ $machine->fail('chroot-exec test -e /etc');
+ $machine->succeed('chroot-exec cat ${symlink} >&2');
+ $machine->succeed('test "$(chroot-exec cat ${symlink})" = "got me"');
+ '';
+ })
+ { description = "check if StateDirectory works";
+ config.serviceConfig.User = "chroot-testuser";
+ config.serviceConfig.Group = "chroot-testgroup";
+ config.serviceConfig.StateDirectory = "testme";
+ testScript = ''
+ $machine->succeed('chroot-exec touch /tmp/canary');
+ $machine->succeed('chroot-exec "echo works > /var/lib/testme/foo"');
+ $machine->succeed('test "$(< /var/lib/testme/foo)" = works');
+ $machine->succeed('test ! -e /tmp/canary');
+ '';
+ }
+ { description = "check if /bin/sh works";
+ testScript = ''
+ $machine->succeed(
+ 'chroot-exec test -e /bin/sh',
+ 'test "$(chroot-exec \'/bin/sh -c "echo bar"\')" = bar',
+ );
+ '';
+ }
+ { description = "check if suppressing /bin/sh works";
+ config.confinement.binSh = null;
+ testScript = ''
+ $machine->succeed(
+ 'chroot-exec test ! -e /bin/sh',
+ 'test "$(chroot-exec \'/bin/sh -c "echo foo"\')" != foo',
+ );
+ '';
+ }
+ { description = "check if we can set /bin/sh to something different";
+ config.confinement.binSh = "${pkgs.hello}/bin/hello";
+ testScript = ''
+ $machine->succeed(
+ 'chroot-exec test -e /bin/sh',
+ 'test "$(chroot-exec /bin/sh -g foo)" = foo',
+ );
+ '';
+ }
+ { description = "check if only Exec* dependencies are included";
+ config.environment.FOOBAR = pkgs.writeText "foobar" "eek\n";
+ testScript = ''
+ $machine->succeed('test "$(chroot-exec \'cat "$FOOBAR"\')" != eek');
+ '';
+ }
+ { description = "check if all unit dependencies are included";
+ config.environment.FOOBAR = pkgs.writeText "foobar" "eek\n";
+ config.confinement.fullUnit = true;
+ testScript = ''
+ $machine->succeed('test "$(chroot-exec \'cat "$FOOBAR"\')" = eek');
+ '';
+ }
+ ];
+
+ options.__testSteps = lib.mkOption {
+ type = lib.types.lines;
+ description = "All of the test steps combined as a single script.";
+ };
+
+ config.environment.systemPackages = lib.singleton testClient;
+
+ config.users.groups.chroot-testgroup = {};
+ config.users.users.chroot-testuser = {
+ description = "Chroot Test User";
+ group = "chroot-testgroup";
+ };
+ };
+
+ testScript = { nodes, ... }: ''
+ $machine->waitForUnit('multi-user.target');
+ ${nodes.machine.config.__testSteps}
+ '';
+}