diff --git a/nixos/lib/make-zfs-image.nix b/nixos/lib/make-zfs-image.nix new file mode 100644 index 00000000000..40648ca24d4 --- /dev/null +++ b/nixos/lib/make-zfs-image.nix @@ -0,0 +1,333 @@ +# Note: This is a private API, internal to NixOS. Its interface is subject +# to change without notice. +# +# The result of this builder is two disk images: +# +# * `boot` - a small disk formatted with FAT to be used for /boot. FAT is +# chosen to support EFI. +# * `root` - a larger disk with a zpool taking the entire disk. +# +# This two-disk approach is taken to satisfy ZFS's requirements for +# autoexpand. +# +# # Why doesn't autoexpand work with ZFS in a partition? +# +# When ZFS owns the whole disk doesn’t really use a partition: it has +# a marker partition at the start and a marker partition at the end of +# the disk. +# +# If ZFS is constrained to a partition, ZFS leaves expanding the partition +# up to the user. Obviously, the user may not choose to do so. +# +# Once the user expands the partition, calling zpool online -e expands the +# vdev to use the whole partition. It doesn’t happen automatically +# presumably because zed doesn’t get an event saying it’s partition grew, +# whereas it can and does get an event saying the whole disk it is on is +# now larger. +{ lib +, pkgs +, # The NixOS configuration to be installed onto the disk image. + config + +, # size of the FAT boot disk, in megabytes. + bootSize ? 1024 + +, # The size of the root disk, in megabytes. + rootSize ? 2048 + +, # The name of the ZFS pool + rootPoolName ? "tank" + +, # zpool properties + rootPoolProperties ? { + autoexpand = "on"; + } +, # pool-wide filesystem properties + rootPoolFilesystemProperties ? { + acltype = "posixacl"; + atime = "off"; + compression = "on"; + mountpoint = "legacy"; + xattr = "sa"; + } + +, # datasets, with per-attribute options: + # mount: (optional) mount point in the VM + # properties: (optional) ZFS properties on the dataset, like filesystemProperties + # Notes: + # 1. datasets will be created from shorter to longer names as a simple topo-sort + # 2. you should define a root's dataset's mount for `/` + datasets ? { } + +, # The files and directories to be placed in the target file system. + # This is a list of attribute sets {source, target} where `source' + # is the file system object (regular file or directory) to be + # grafted in the file system at path `target'. + contents ? [] + +, # The initial NixOS configuration file to be copied to + # /etc/nixos/configuration.nix. This configuration will be embedded + # inside a configuration which includes the described ZFS fileSystems. + configFile ? null + +, # Shell code executed after the VM has finished. + postVM ? "" + +, name ? "nixos-disk-image" + +, # Disk image format, one of qcow2, qcow2-compressed, vdi, vpc, raw. + format ? "raw" + +, # Include a copy of Nixpkgs in the disk image + includeChannel ? true +}: +let + formatOpt = if format == "qcow2-compressed" then "qcow2" else format; + + compress = lib.optionalString (format == "qcow2-compressed") "-c"; + + filenameSuffix = "." + { + qcow2 = "qcow2"; + vdi = "vdi"; + vpc = "vhd"; + raw = "img"; + }.${formatOpt} or formatOpt; + bootFilename = "nixos.boot${filenameSuffix}"; + rootFilename = "nixos.root${filenameSuffix}"; + + # FIXME: merge with channel.nix / make-channel.nix. + channelSources = + let + nixpkgs = lib.cleanSource pkgs.path; + in + pkgs.runCommand "nixos-${config.system.nixos.version}" {} '' + mkdir -p $out + cp -prd ${nixpkgs.outPath} $out/nixos + chmod -R u+w $out/nixos + if [ ! -e $out/nixos/nixpkgs ]; then + ln -s . $out/nixos/nixpkgs + fi + rm -rf $out/nixos/.git + echo -n ${config.system.nixos.versionSuffix} > $out/nixos/.version-suffix + ''; + + closureInfo = pkgs.closureInfo { + rootPaths = [ config.system.build.toplevel ] + ++ (lib.optional includeChannel channelSources); + }; + + modulesTree = pkgs.aggregateModules + (with config.boot.kernelPackages; [ kernel zfs ]); + + tools = lib.makeBinPath ( + with pkgs; [ + config.system.build.nixos-enter + config.system.build.nixos-install + dosfstools + e2fsprogs + gptfdisk + nix + parted + utillinux + zfs + ] + ); + + hasDefinedMount = disk: ((disk.mount or null) != null); + + stringifyProperties = prefix: properties: lib.concatStringsSep " \\\n" ( + lib.mapAttrsToList + ( + property: value: "${prefix} ${lib.escapeShellArg property}=${lib.escapeShellArg value}" + ) + properties + ); + + featuresToProperties = features: + lib.listToAttrs + (builtins.map (feature: { + name = "feature@${feature}"; + value = "enabled"; + }) features); + + createDatasets = + let + datasetlist = lib.mapAttrsToList lib.nameValuePair datasets; + sorted = lib.sort (left: right: (lib.stringLength left.name) < (lib.stringLength right.name)) datasetlist; + cmd = { name, value }: + let + properties = stringifyProperties "-o" (value.properties or {}); + in + "zfs create -p ${properties} ${name}"; + in + lib.concatMapStringsSep "\n" cmd sorted; + + mountDatasets = + let + datasetlist = lib.mapAttrsToList lib.nameValuePair datasets; + mounts = lib.filter ({ value, ... }: hasDefinedMount value) datasetlist; + sorted = lib.sort (left: right: (lib.stringLength left.value.mount) < (lib.stringLength right.value.mount)) mounts; + cmd = { name, value }: + '' + mkdir -p /mnt${lib.escapeShellArg value.mount} + mount -t zfs ${name} /mnt${lib.escapeShellArg value.mount} + ''; + in + lib.concatMapStringsSep "\n" cmd sorted; + + unmountDatasets = + let + datasetlist = lib.mapAttrsToList lib.nameValuePair datasets; + mounts = lib.filter ({ value, ... }: hasDefinedMount value) datasetlist; + sorted = lib.sort (left: right: (lib.stringLength left.value.mount) > (lib.stringLength right.value.mount)) mounts; + cmd = { name, value }: + '' + umount /mnt${lib.escapeShellArg value.mount} + ''; + in + lib.concatMapStringsSep "\n" cmd sorted; + + + fileSystemsCfgFile = + let + mountable = lib.filterAttrs (_: value: hasDefinedMount value) datasets; + in + pkgs.runCommand "filesystem-config.nix" { + buildInputs = with pkgs; [ jq nixpkgs-fmt ]; + filesystems = builtins.toJSON { + fileSystems = lib.mapAttrs' + ( + dataset: attrs: + { + name = attrs.mount; + value = { + fsType = "zfs"; + device = "${dataset}"; + }; + } + ) + mountable; + }; + passAsFile = [ "filesystems" ]; + } '' + ( + echo "builtins.fromJSON '''" + jq . < "$filesystemsPath" + echo "'''" + ) > $out + + nixpkgs-fmt $out + ''; + + mergedConfig = + if configFile == null + then fileSystemsCfgFile + else + pkgs.runCommand "configuration.nix" { + buildInputs = with pkgs; [ nixpkgs-fmt ]; + } + '' + ( + echo '{ imports = [' + printf "(%s)\n" "$(cat ${fileSystemsCfgFile})"; + printf "(%s)\n" "$(cat ${configFile})"; + echo ']; }' + ) > $out + + nixpkgs-fmt $out + ''; + + image = ( + pkgs.vmTools.override { + rootModules = + [ "zfs" "9p" "9pnet_virtio" "virtio_pci" "virtio_blk" ] ++ + (pkgs.lib.optional (pkgs.stdenv.isi686 || pkgs.stdenv.isx86_64) "rtc_cmos"); + kernel = modulesTree; + } + ).runInLinuxVM ( + pkgs.runCommand name + { + QEMU_OPTS = "-drive file=$bootDiskImage,if=virtio,cache=unsafe,werror=report" + + " -drive file=$rootDiskImage,if=virtio,cache=unsafe,werror=report"; + preVM = '' + PATH=$PATH:${pkgs.qemu_kvm}/bin + mkdir $out + bootDiskImage=boot.raw + qemu-img create -f raw $bootDiskImage ${toString bootSize}M + + rootDiskImage=root.raw + qemu-img create -f raw $rootDiskImage ${toString rootSize}M + ''; + + postVM = '' + ${if formatOpt == "raw" then '' + mv $bootDiskImage $out/${bootFilename} + mv $rootDiskImage $out/${rootFilename} + '' else '' + ${pkgs.qemu}/bin/qemu-img convert -f raw -O ${formatOpt} ${compress} $bootDiskImage $out/${bootFilename} + ${pkgs.qemu}/bin/qemu-img convert -f raw -O ${formatOpt} ${compress} $rootDiskImage $out/${rootFilename} + ''} + bootDiskImage=$out/${bootFilename} + rootDiskImage=$out/${rootFilename} + set -x + ${postVM} + ''; + } '' + export PATH=${tools}:$PATH + set -x + + cp -sv /dev/vda /dev/sda + cp -sv /dev/vda /dev/xvda + + parted --script /dev/vda -- \ + mklabel gpt \ + mkpart no-fs 1MiB 2MiB \ + set 1 bios_grub on \ + align-check optimal 1 \ + mkpart ESP fat32 2MiB -1MiB \ + align-check optimal 2 \ + print + + sfdisk --dump /dev/vda + + + zpool create \ + ${stringifyProperties " -o" rootPoolProperties} \ + ${stringifyProperties " -O" rootPoolFilesystemProperties} \ + ${rootPoolName} /dev/vdb + parted --script /dev/vdb -- print + + ${createDatasets} + ${mountDatasets} + + mkdir -p /mnt/boot + mkfs.vfat -n ESP /dev/vda2 + mount /dev/vda2 /mnt/boot + + mount + + # Install a configuration.nix + mkdir -p /mnt/etc/nixos + # `cat` so it is mutable on the fs + cat ${mergedConfig} > /mnt/etc/nixos/configuration.nix + + export NIX_STATE_DIR=$TMPDIR/state + nix-store --load-db < ${closureInfo}/registration + + nixos-install \ + --root /mnt \ + --no-root-passwd \ + --system ${config.system.build.toplevel} \ + --substituters "" \ + ${lib.optionalString includeChannel ''--channel ${channelSources}''} + + df -h + + umount /mnt/boot + ${unmountDatasets} + + zpool export ${rootPoolName} + '' + ); +in +image diff --git a/nixos/maintainers/scripts/ec2/amazon-image-zfs.nix b/nixos/maintainers/scripts/ec2/amazon-image-zfs.nix new file mode 100644 index 00000000000..32dd96a7cb7 --- /dev/null +++ b/nixos/maintainers/scripts/ec2/amazon-image-zfs.nix @@ -0,0 +1,12 @@ +{ + imports = [ ./amazon-image.nix ]; + ec2.zfs = { + enable = true; + datasets = { + "tank/system/root".mount = "/"; + "tank/system/var".mount = "/var"; + "tank/local/nix".mount = "/nix"; + "tank/user/home".mount = "/home"; + }; + }; +} diff --git a/nixos/maintainers/scripts/ec2/amazon-image.nix b/nixos/maintainers/scripts/ec2/amazon-image.nix index 677aff4421e..6942b58f236 100644 --- a/nixos/maintainers/scripts/ec2/amazon-image.nix +++ b/nixos/maintainers/scripts/ec2/amazon-image.nix @@ -4,6 +4,7 @@ with lib; let cfg = config.amazonImage; + in { imports = [ ../../../modules/virtualisation/amazon-image.nix ]; @@ -53,15 +54,7 @@ in { }; }; - config.system.build.amazonImage = import ../../../lib/make-disk-image.nix { - inherit lib config; - inherit (cfg) contents format name; - pkgs = import ../../../.. { inherit (pkgs) system; }; # ensure we use the regular qemu-kvm package - partitionTableType = if config.ec2.efi then "efi" - else if config.ec2.hvm then "legacy+gpt" - else "none"; - diskSize = cfg.sizeMB; - fsType = "ext4"; + config.system.build.amazonImage = let configFile = pkgs.writeText "configuration.nix" '' { modulesPath, ... }: { @@ -72,24 +65,96 @@ in { ${optionalString config.ec2.efi '' ec2.efi = true; ''} + ${optionalString config.ec2.zfs.enable '' + ec2.zfs.enable = true; + networking.hostId = "${config.networking.hostId}"; + ''} } ''; - postVM = '' - extension=''${diskImage##*.} - friendlyName=$out/${cfg.name}.$extension - mv "$diskImage" "$friendlyName" - diskImage=$friendlyName - mkdir -p $out/nix-support - echo "file ${cfg.format} $diskImage" >> $out/nix-support/hydra-build-products + zfsBuilder = import ../../../lib/make-zfs-image.nix { + inherit lib config configFile; + inherit (cfg) contents format name; + pkgs = import ../../../.. { inherit (pkgs) system; }; # ensure we use the regular qemu-kvm package - ${pkgs.jq}/bin/jq -n \ - --arg label ${lib.escapeShellArg config.system.nixos.label} \ - --arg system ${lib.escapeShellArg pkgs.stdenv.hostPlatform.system} \ - --arg logical_bytes "$(${pkgs.qemu}/bin/qemu-img info --output json "$diskImage" | ${pkgs.jq}/bin/jq '."virtual-size"')" \ - --arg file "$diskImage" \ - '$ARGS.named' \ - > $out/nix-support/image-info.json - ''; - }; + includeChannel = true; + + bootSize = 1000; # 1G is the minimum EBS volume + + rootSize = cfg.sizeMB; + rootPoolProperties = { + ashift = 12; + autoexpand = "on"; + }; + + datasets = config.ec2.zfs.datasets; + + postVM = '' + extension=''${rootDiskImage##*.} + friendlyName=$out/${cfg.name} + rootDisk="$friendlyName.root.$extension" + bootDisk="$friendlyName.boot.$extension" + mv "$rootDiskImage" "$rootDisk" + mv "$bootDiskImage" "$bootDisk" + + mkdir -p $out/nix-support + echo "file ${cfg.format} $bootDisk" >> $out/nix-support/hydra-build-products + echo "file ${cfg.format} $rootDisk" >> $out/nix-support/hydra-build-products + + ${pkgs.jq}/bin/jq -n \ + --arg system_label ${lib.escapeShellArg config.system.nixos.label} \ + --arg system ${lib.escapeShellArg pkgs.stdenv.hostPlatform.system} \ + --arg root_logical_bytes "$(${pkgs.qemu}/bin/qemu-img info --output json "$bootDisk" | ${pkgs.jq}/bin/jq '."virtual-size"')" \ + --arg boot_logical_bytes "$(${pkgs.qemu}/bin/qemu-img info --output json "$rootDisk" | ${pkgs.jq}/bin/jq '."virtual-size"')" \ + --arg root "$rootDisk" \ + --arg boot "$bootDisk" \ + '{} + | .label = $system_label + | .system = $system + | .disks.boot.logical_bytes = $boot_logical_bytes + | .disks.boot.file = $boot + | .disks.root.logical_bytes = $root_logical_bytes + | .disks.root.file = $root + ' > $out/nix-support/image-info.json + ''; + }; + + extBuilder = import ../../../lib/make-disk-image.nix { + inherit lib config configFile; + + inherit (cfg) contents format name; + pkgs = import ../../../.. { inherit (pkgs) system; }; # ensure we use the regular qemu-kvm package + + fsType = "ext4"; + partitionTableType = if config.ec2.efi then "efi" + else if config.ec2.hvm then "legacy+gpt" + else "none"; + + diskSize = cfg.sizeMB; + + postVM = '' + extension=''${diskImage##*.} + friendlyName=$out/${cfg.name}.$extension + mv "$diskImage" "$friendlyName" + diskImage=$friendlyName + + mkdir -p $out/nix-support + echo "file ${cfg.format} $diskImage" >> $out/nix-support/hydra-build-products + + ${pkgs.jq}/bin/jq -n \ + --arg system_label ${lib.escapeShellArg config.system.nixos.label} \ + --arg system ${lib.escapeShellArg pkgs.stdenv.hostPlatform.system} \ + --arg logical_bytes "$(${pkgs.qemu}/bin/qemu-img info --output json "$diskImage" | ${pkgs.jq}/bin/jq '."virtual-size"')" \ + --arg file "$diskImage" \ + '{} + | .label = $system_label + | .system = $system + | .logical_bytes = $logical_bytes + | .file = $file + | .disks.root.logical_bytes = $logical_bytes + | .disks.root.file = $file + ' > $out/nix-support/image-info.json + ''; + }; + in if config.ec2.zfs.enable then zfsBuilder else extBuilder; } diff --git a/nixos/modules/virtualisation/amazon-image.nix b/nixos/modules/virtualisation/amazon-image.nix index 26297a7d0f1..65a3fc9c90e 100644 --- a/nixos/modules/virtualisation/amazon-image.nix +++ b/nixos/modules/virtualisation/amazon-image.nix @@ -33,17 +33,23 @@ in boot.growPartition = cfg.hvm; - fileSystems."/" = { + fileSystems."/" = mkIf (!cfg.zfs.enable) { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; autoResize = true; }; - fileSystems."/boot" = mkIf cfg.efi { + fileSystems."/boot" = mkIf (cfg.efi || cfg.zfs.enable) { + # The ZFS image uses a partition labeled ESP whether or not we're + # booting with EFI. device = "/dev/disk/by-label/ESP"; fsType = "vfat"; }; + services.zfs.expandOnBoot = mkIf cfg.zfs.enable "all"; + + boot.zfs.devNodes = mkIf cfg.zfs.enable "/dev/"; + boot.extraModulePackages = [ config.boot.kernelPackages.ena ]; diff --git a/nixos/modules/virtualisation/amazon-options.nix b/nixos/modules/virtualisation/amazon-options.nix index 2e807131e93..698edcd835a 100644 --- a/nixos/modules/virtualisation/amazon-options.nix +++ b/nixos/modules/virtualisation/amazon-options.nix @@ -1,7 +1,46 @@ { config, lib, pkgs, ... }: -{ +let + inherit (lib) types; +in { options = { ec2 = { + zfs = { + enable = lib.mkOption { + default = false; + internal = true; + description = '' + Whether the EC2 instance uses a ZFS root. + ''; + }; + + datasets = lib.mkOption { + description = '' + Datasets to create under the `tank` and `boot` zpools. + + **NOTE:** This option is used only at image creation time, and + does not attempt to declaratively create or manage datasets + on an existing system. + ''; + + default = {}; + + type = types.attrsOf (types.submodule { + options = { + mount = lib.mkOption { + description = "Where to mount this dataset."; + type = types.nullOr types.string; + default = null; + }; + + properties = lib.mkOption { + description = "Properties to set on this dataset."; + type = types.attrsOf types.string; + default = {}; + }; + }; + }); + }; + }; hvm = lib.mkOption { default = lib.versionAtLeast config.system.stateVersion "17.03"; internal = true; @@ -18,4 +57,17 @@ }; }; }; + + config = lib.mkIf config.ec2.zfs.enable { + networking.hostId = lib.mkDefault "00000000"; + + fileSystems = let + mountable = lib.filterAttrs (_: value: ((value.mount or null) != null)) config.ec2.zfs.datasets; + in lib.mapAttrs' + (dataset: opts: lib.nameValuePair opts.mount { + device = dataset; + fsType = "zfs"; + }) + mountable; + }; } diff --git a/nixos/release.nix b/nixos/release.nix index 37dd3ff6055..6836fcdcfcc 100644 --- a/nixos/release.nix +++ b/nixos/release.nix @@ -217,6 +217,20 @@ in rec { }).config.system.build.amazonImage) ); + amazonImageZfs = forMatchingSystems [ "x86_64-linux" "aarch64-linux" ] (system: + + with import ./.. { inherit system; }; + + hydraJob ((import lib/eval-config.nix { + inherit system; + modules = + [ configuration + versionModule + ./maintainers/scripts/ec2/amazon-image-zfs.nix + ]; + }).config.system.build.amazonImage) + + ); # Test job for https://github.com/NixOS/nixpkgs/issues/121354 to test