| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  | { config, lib, ... }: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | with lib; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   meta = { | 
					
						
							|  |  |  |     maintainers = [ maintainers.joachifm ]; | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   options = { | 
					
						
							|  |  |  |     security.allowUserNamespaces = mkOption { | 
					
						
							|  |  |  |       type = types.bool; | 
					
						
							|  |  |  |       default = true; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							| 
									
										
										
										
											2019-04-21 11:50:52 +02:00
										 |  |  |         Whether to allow creation of user namespaces. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The motivation for disabling user namespaces is the potential | 
					
						
							|  |  |  |         presence of code paths where the kernel's permission checking | 
					
						
							|  |  |  |         logic fails to account for namespacing, instead permitting a | 
					
						
							|  |  |  |         namespaced process to act outside the namespace with the same | 
					
						
							|  |  |  |         privileges as it would have inside it.  This is particularly | 
					
						
							| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  |         damaging in the common case of running as root within the namespace. | 
					
						
							| 
									
										
										
										
											2019-04-21 11:50:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         When user namespace creation is disallowed, attempting to create a | 
					
						
							|  |  |  |         user namespace fails with "no space left on device" (ENOSPC). | 
					
						
							|  |  |  |         root may re-enable user namespace creation at runtime. | 
					
						
							| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							| 
									
										
										
										
											2018-12-16 10:37:36 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-23 13:17:53 +03:00
										 |  |  |     security.unprivilegedUsernsClone = mkOption { | 
					
						
							|  |  |  |       type = types.bool; | 
					
						
							|  |  |  |       default = false; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							|  |  |  |         When disabled, unprivileged users will not be able to create new namespaces. | 
					
						
							|  |  |  |         By default unprivileged user namespaces are disabled. | 
					
						
							|  |  |  |         This option only works in a hardened profile. | 
					
						
							|  |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-16 10:37:36 +01:00
										 |  |  |     security.protectKernelImage = mkOption { | 
					
						
							|  |  |  |       type = types.bool; | 
					
						
							|  |  |  |       default = false; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							|  |  |  |         Whether to prevent replacing the running kernel image. | 
					
						
							|  |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-26 22:24:04 +01:00
										 |  |  |     security.allowSimultaneousMultithreading = mkOption { | 
					
						
							|  |  |  |       type = types.bool; | 
					
						
							|  |  |  |       default = true; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							|  |  |  |         Whether to allow SMT/hyperthreading.  Disabling SMT means that only | 
					
						
							|  |  |  |         physical CPU cores will be usable at runtime, potentially at | 
					
						
							|  |  |  |         significant performance cost. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The primary motivation for disabling SMT is to mitigate the risk of | 
					
						
							|  |  |  |         leaking data between threads running on the same CPU core (due to | 
					
						
							|  |  |  |         e.g., shared caches).  This attack vector is unproven. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Disabling SMT is a supplement to the L1 data cache flushing mitigation | 
					
						
							| 
									
										
										
										
											2019-07-19 15:49:37 +02:00
										 |  |  |         (see <xref linkend="opt-security.virtualisation.flushL1DataCache"/>) | 
					
						
							| 
									
										
										
										
											2018-12-26 22:24:04 +01:00
										 |  |  |         versus malicious VM guests (SMT could "bring back" previously flushed | 
					
						
							|  |  |  |         data). | 
					
						
							|  |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-30 02:24:56 +02:00
										 |  |  |     security.forcePageTableIsolation = mkOption { | 
					
						
							|  |  |  |       type = types.bool; | 
					
						
							|  |  |  |       default = false; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							|  |  |  |         Whether to force-enable the Page Table Isolation (PTI) Linux kernel | 
					
						
							|  |  |  |         feature even on CPU models that claim to be safe from Meltdown. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         This hardening feature is most beneficial to systems that run untrusted | 
					
						
							|  |  |  |         workloads that rely on address space isolation for security. | 
					
						
							|  |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-19 15:49:37 +02:00
										 |  |  |     security.virtualisation.flushL1DataCache = mkOption { | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  |       type = types.nullOr (types.enum [ "never" "cond" "always" ]); | 
					
						
							|  |  |  |       default = null; | 
					
						
							|  |  |  |       description = ''
 | 
					
						
							|  |  |  |         Whether the hypervisor should flush the L1 data cache before | 
					
						
							|  |  |  |         entering guests. | 
					
						
							| 
									
										
										
										
											2018-12-26 22:24:04 +01:00
										 |  |  |         See also <xref linkend="opt-security.allowSimultaneousMultithreading"/>. | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-05-13 09:15:17 +02:00
										 |  |  |         <variablelist> | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  |           <varlistentry> | 
					
						
							|  |  |  |             <term><literal>null</literal></term> | 
					
						
							|  |  |  |             <listitem><para>uses the kernel default</para></listitem> | 
					
						
							|  |  |  |           </varlistentry> | 
					
						
							|  |  |  |           <varlistentry> | 
					
						
							|  |  |  |             <term><literal>"never"</literal></term> | 
					
						
							|  |  |  |             <listitem><para>disables L1 data cache flushing entirely. | 
					
						
							|  |  |  |             May be appropriate if all guests are trusted.</para></listitem> | 
					
						
							|  |  |  |           </varlistentry> | 
					
						
							|  |  |  |           <varlistentry> | 
					
						
							|  |  |  |             <term><literal>"cond"</literal></term> | 
					
						
							|  |  |  |             <listitem><para>flushes L1 data cache only for pre-determined | 
					
						
							|  |  |  |             code paths.  May leak information about the host address space | 
					
						
							|  |  |  |             layout.</para></listitem> | 
					
						
							|  |  |  |           </varlistentry> | 
					
						
							|  |  |  |           <varlistentry> | 
					
						
							|  |  |  |             <term><literal>"always"</literal></term> | 
					
						
							|  |  |  |             <listitem><para>flushes L1 data cache every time the hypervisor | 
					
						
							|  |  |  |             enters the guest.  May incur significant performance cost. | 
					
						
							|  |  |  |             </para></listitem> | 
					
						
							|  |  |  |           </varlistentry> | 
					
						
							| 
									
										
										
										
											2019-05-13 09:15:17 +02:00
										 |  |  |         </variablelist> | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  |       '';
 | 
					
						
							|  |  |  |     }; | 
					
						
							| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-24 18:37:46 +01:00
										 |  |  |   config = mkMerge [ | 
					
						
							|  |  |  |     (mkIf (!config.security.allowUserNamespaces) { | 
					
						
							|  |  |  |       # Setting the number of allowed user namespaces to 0 effectively disables | 
					
						
							|  |  |  |       # the feature at runtime.  Note that root may raise the limit again | 
					
						
							|  |  |  |       # at any time. | 
					
						
							|  |  |  |       boot.kernel.sysctl."user.max_user_namespaces" = 0; | 
					
						
							| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-24 18:37:46 +01:00
										 |  |  |       assertions = [ | 
					
						
							|  |  |  |         { assertion = config.nix.useSandbox -> config.security.allowUserNamespaces; | 
					
						
							|  |  |  |           message = "`nix.useSandbox = true` conflicts with `!security.allowUserNamespaces`."; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |       ]; | 
					
						
							|  |  |  |     }) | 
					
						
							| 
									
										
										
										
											2018-12-16 10:37:36 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-23 13:17:53 +03:00
										 |  |  |     (mkIf config.security.unprivilegedUsernsClone { | 
					
						
							|  |  |  |       boot.kernel.sysctl."kernel.unprivileged_userns_clone" = mkDefault true; | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-16 10:37:36 +01:00
										 |  |  |     (mkIf config.security.protectKernelImage { | 
					
						
							|  |  |  |       # Disable hibernation (allows replacing the running kernel) | 
					
						
							|  |  |  |       boot.kernelParams = [ "nohibernate" ]; | 
					
						
							|  |  |  |       # Prevent replacing the running kernel image w/o reboot | 
					
						
							|  |  |  |       boot.kernel.sysctl."kernel.kexec_load_disabled" = mkDefault true; | 
					
						
							|  |  |  |     }) | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-26 22:24:04 +01:00
										 |  |  |     (mkIf (!config.security.allowSimultaneousMultithreading) { | 
					
						
							|  |  |  |       boot.kernelParams = [ "nosmt" ]; | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-30 02:24:56 +02:00
										 |  |  |     (mkIf config.security.forcePageTableIsolation { | 
					
						
							|  |  |  |       boot.kernelParams = [ "pti=on" ]; | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-19 15:49:37 +02:00
										 |  |  |     (mkIf (config.security.virtualisation.flushL1DataCache != null) { | 
					
						
							|  |  |  |       boot.kernelParams = [ "kvm-intel.vmentry_l1d_flush=${config.security.virtualisation.flushL1DataCache}" ]; | 
					
						
							| 
									
										
										
										
											2018-12-26 22:22:55 +01:00
										 |  |  |     }) | 
					
						
							| 
									
										
										
										
											2018-11-24 18:37:46 +01:00
										 |  |  |   ]; | 
					
						
							| 
									
										
										
										
											2018-10-15 00:39:26 +02:00
										 |  |  | } |