From 7df55477fd04b1443051f767fd77ea42cb50ed59 Mon Sep 17 00:00:00 2001 From: Charles Duffy <charles@dyfis.net> Date: Sun, 14 Oct 2018 00:40:37 -0500 Subject: [PATCH 1/3] bees: init at 0.6.1 Introduce an extent-layer (as opposed to the existing file-level) deduplication system for btrfs. This provides a means of finding similarities within non-identical files, when they contain identical, aligned blocks. --- .../filesystems/bees/bees-service-wrapper | 223 ++++++++++++++++++ pkgs/tools/filesystems/bees/default.nix | 69 ++++++ pkgs/top-level/all-packages.nix | 2 + 3 files changed, 294 insertions(+) create mode 100755 pkgs/tools/filesystems/bees/bees-service-wrapper create mode 100644 pkgs/tools/filesystems/bees/default.nix diff --git a/pkgs/tools/filesystems/bees/bees-service-wrapper b/pkgs/tools/filesystems/bees/bees-service-wrapper new file mode 100755 index 00000000000..8ef46afc18f --- /dev/null +++ b/pkgs/tools/filesystems/bees/bees-service-wrapper @@ -0,0 +1,223 @@ +#!@bash@/bin/bash +PATH=@bash@/bin:@coreutils@/bin:@utillinux@/bin:@btrfsProgs@/bin:$PATH +beesd_bin=@bees@/lib/bees/bees +# PLEASE KEEP NIX-ISMS ABOVE THIS LINE TO EASE UPSTREAM MERGE +#!/usr/bin/env bash + +shopt -s extglob + +# Upstream wrapper requires UUID to be used for configuration. + +# However, when declaratively describing a host, we may not know its UUID, and +# shouldn't need to persist something that will differ between hosts built from +# the same configuration template. + +# Thus, for using bees from NixOS, we have our own wrapper, which supports not +# just UUID but any specification permitted by findmnt + +[[ $bees_debug ]] && { PS4=':${BASH_SOURCE##*/}:$LINENO+'; set -x; } + +usage() { + cat >&2 <<EOF +Usage: ${BASH_SOURCE##*/} run|cleanup config-name|fsSpec [idxSizeMB=...] [verbosity=...] [workDir=...] [-- daemon-options...] + + fsSpec should be in a format recognized by findmnt. Alternately, + "config-name" may refer to a file that exists in ${bees_config_dir:-/etc/bees} + with a .conf extension; if that file does not specify UUID, findmnt will be + used in addition. + + Note that while config files may presently use shell arithmetic, use of this + functionality is not encouraged going forward: Setting ''idxSizeMB=4096'' is + preferred over ''DB_SIZE=$((1024*1024*1024*4))'' or ''DB_SIZE=$(( AL16M * 256 ))'', + although both of these are presently supported. + + If fsSpec contains a /, it assumed to be a mount point to be looked up by + findmnt, not a config file name. + + daemon-options are passed directly through to the daemon on startup, as + documented at https://github.com/Zygo/bees/blob/master/docs/options.md. +EOF + exit 1 +} + +die() { echo "$*" >&2; exit 1; } + +allConfigNames=( blockdev fsSpec home idxSize idxSizeMB mntDir runDir status verbosity workDir ) + +# Alternate names for configuration values; "bees_" will always be prepended +declare -A altConfigNames=( + # from original bees wrapper + [BEESHOME]=home + [BEESSTATUS]=status + [MNT_DIR]=mntDir + [UUID]=uuid + [WORK_DIR]=runDir + [DB_SIZE]=idxSize +) + +# legacy bees config files can be arbitrary shell scripts, so we need to actually evaluate them +sandboxedConfigFileEval() { + bash_exe=$(type -P bash) || exit + PATH=/var/empty ENV='' BASH_ENV='' AL128K="$((128*1024))" AL16M="$((16*1024*1024))" "$bash_exe" -r ${bees_debug+-x} \ + -c 'eval "$(</dev/stdin)" >&2; for var; do [[ ${!var} ]] && printf "%q=%s\\0" "$var" "${!var}"; done' \ + "${!altConfigNames[@]}" "${allConfigNames[@]}" \ + <"$1" +} + +readConfigFileIfExists() { + local line + [[ -s $1 ]] || return 1 + while IFS= read -r -d '' line; do + line=${line%%+([[:space:]])"#"*} + [[ $line ]] || continue + [[ $line = *=* ]] || { + printf 'WARNING: Config file line not recognized: %q\n' "$line" >&2 + continue + } + set_option "$line" + done < <(sandboxedConfigFileEval "$1") +} + +set_option() { + local k v + k="${1%%=*}" v="${1#*=}" + [[ ${altConfigNames[$k]} ]] && k=${altConfigNames[$k]} + printf -v "bees_$k" %s "$v" +} + +uuid_re='^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$' + +# Shared code for setting configuration used by other operations. +# +# Reads from global associative array "opts" containing options passed in as +# key=value pairs on the command line, looks for config-file overrides, and +# sets individual global variables. +_setup() { + declare fstype + bees_fsSpec=$1; shift + + # Look for file-based configuration, additional to honoring configuration on the command line + bees_config_dir="${bees_config_dir:-/etc/bees}" + if [[ $bees_fsSpec =~ $uuid_re ]]; then + bees_uuid=$bees_fsSpec + # If our spec looks like a bare UUID, and no config file exists in the new + # format, fall back to legacy config file search mechanism (grep; ewww). + if ! readConfigFileIfExists "$bees_config_dir/UUID=$bees_fsSpec.conf"; then + # Legacy approach to finding a config file: Grep for a *.conf file + # containing the UUID within its text. Permitting spaces around the "=" + # appears to be a bug, but is retained for compatibility with the + # original upstream script. + allConfFiles=( "$bees_config_dir"/*.conf ) + if (( ${#allConfFiles[@]} )); then + # in read or readarray with -d '', the NUL terminating the empty string is used as delimiter character. + readarray -d '' -t matchingConfFiles < <(grep -E -l -Z "^[^#]*UUID[[:space:]]*=[[:space:]]*" "${allConfFiles[@]}") + else + matchingConfFiles=( ) + fi + if (( ${#matchingConfFiles[@]} == 1 )); then + # Exactly one configuration file exists in our target directory with a reference to the UUID given. + bees_config_file=${matchingConfFiles[0]} + readConfigFileIfExists "$bees_config_file" + echo "NOTE: Please consider renaming $bees_config_file to $bees_config_dir/UUID=$bees_fsSpec" >&2 + echo " ...and passing UUID=$bees_fsSpec on startup." >&2 + elif (( ${#matchingConfFiles[@]} > 1 )); then + # The legacy wrapper would silently use the first file and ignore + # others, but... no. + echo "ERROR: Passed a bare UUID, but multiple configuration files match it:" >&2 + printf ' - %q\n' "${matchingConfFiles[@]}" >&2 + die "Unable to continue." + fi + fi + else + # For a non-UUID fsSpec that is not a path, look only for a config file + # exactly matching its text. + # + # (Passing a mount point as a fsSpec is only supported with the new + # wrapper; all key=value pairs can be passed on the command line in this + # mode, so config file support is not needed). + [[ $bees_fsSpec = */* ]] || readConfigFileIfExists "$bees_config_dir/$bees_fsSpec.conf" + fi + + [[ $bees_uuid ]] || { + # if bees_uuid is not in our .conf file, look it up with findmnt + read -r bees_uuid fstype < <(findmnt -n -o uuid,fstype "$bees_fsSpec") && [[ $fstype ]] || exit + [[ $fstype = btrfs ]] || die "Device type is $fstype, not btrfs" + } + + [[ $bees_uuid = */* ]] || readConfigFileIfExists "$bees_config_dir/UUID=$bees_uuid.conf" + + # Honor any values read from config files above; otherwise, set defaults. + bees_workDir="${bees_workDir:-.beeshome}" + bees_runDir="${bees_runDir:-/run/bees}" + bees_mntDir="${bees_mntDir:-$bees_runDir/mnt/$bees_uuid}" + bees_home="${bees_home:-$bees_mntDir/$bees_workDir}" + bees_status="${bees_status:-${bees_runDir}/$bees_uuid.status}" + bees_verbosity="${bees_verbosity:-6}" + bees_idxSizeMB="${bees_idxSizeMB:-1024}" + bees_idxSize=${bees_idxSize:-"$(( bees_idxSizeMB * 1024 * 1024 ))"} + bees_blockdev=${bees_blockdev:-"/dev/disk/by-uuid/$bees_uuid"} + + [[ -b $bees_blockdev ]] || die "Block device $bees_blockdev missing" + (( bees_idxSize % (16 * 1024 * 1024) == 0 )) || die "DB size must be divisible by 16MB" +} + +do_run() { + local db old_db_size + + _setup "$1"; shift + mkdir -p -- "$bees_mntDir" || exit + + # subvol id 5 is reserved for the root subvolume of a btrfs filesystem. + mountpoint -q "$bees_mntDir" || mount -osubvolid=5 -- "$bees_blockdev" "$bees_mntDir" || exit + if [[ -d $bees_home ]]; then + btrfs subvolume show "$bees_home" >/dev/null 2>&1 || die "$bees_home exists but is not a subvolume" + else + btrfs subvolume create "$bees_home" || exit + sync # workaround for Zygo/bees#93 + fi + db=$bees_home/beeshash.dat + touch -- "$db" + + old_db_size=$(stat -c %s -- "$db") + new_db_size=$bees_idxSize + + if (( old_db_size != new_db_size )); then + rm -f -- "$bees_home"/beescrawl."$bees_uuid".dat + truncate -s "$new_db_size" -- "$db" || exit + fi + chmod 700 -- "$bees_home" + + # BEESSTATUS and BEESHOME are the only variables handled by the legacy + # wrapper for which getenv() is called in C code. + BEESSTATUS=$bees_status BEESHOME=$bees_home exec "${beesd_bin:-/lib/bees/bees}" \ + --verbose "$bees_verbosity" \ + "$@" "$bees_mntDir" || exit +} + +do_cleanup() { + _setup "$1"; shift + mountpoint -q "$bees_mntDir" && umount -l -- "$bees_mntDir" || exit +} + +(( $# >= 2 )) || usage +declare -f "do_$1" >/dev/null 2>&1 || usage +mode=$1; shift # must be a do_* function; currently "run" or "cleanup" + +declare -a args=( "$1" ); shift # pass first argument (config-name|fsSpec) through literally + +# parse other arguments as key=value pairs, or pass them through literally if they do not match that form. +# similarly, any option after "--" will be passed through literally. +while (( $# )); do + if [[ $1 = *=* ]]; then + set_option "$1" + elif [[ $1 = -- ]]; then + shift + args+=( "$@" ) + break + else + args+=( "$1" ) + fi + shift +done + +"do_$mode" "${args[@]}" diff --git a/pkgs/tools/filesystems/bees/default.nix b/pkgs/tools/filesystems/bees/default.nix new file mode 100644 index 00000000000..c43962cb075 --- /dev/null +++ b/pkgs/tools/filesystems/bees/default.nix @@ -0,0 +1,69 @@ +{ stdenv, runCommand, makeWrapper, fetchFromGitHub, bash, btrfs-progs, coreutils, pythonPackages, utillinux }: + +let + + version = "0.6.1"; + sha256 = "0h7idclmhyp14mq6786x7f2237vqpn70gyi88ik4g70xl84yfgyh"; + + bees = stdenv.mkDerivation rec { + name = "bees-${version}"; + inherit version; + + src = fetchFromGitHub { + owner = "Zygo"; + repo = "bees"; + rev = "v${version}"; + inherit sha256; + }; + + buildInputs = [ + btrfs-progs # for btrfs/ioctl.h + utillinux # for uuid.h + ]; + + nativeBuildInputs = [ + pythonPackages.markdown # documentation build + ]; + + preBuild = '' + git() { if [[ $1 = describe ]]; then echo ${version}; else command git "$@"; fi; } + export -f git + ''; + + postBuild = '' + unset -f git + ''; + + buildFlags = [ + "ETC_PREFIX=/var/run/bees/configs" + ]; + + makeFlags = [ + "SHELL=bash" + "PREFIX=$(out)" + "ETC_PREFIX=$(out)/etc" + "BEES_VERSION=${version}" + "SYSTEMD_SYSTEM_UNIT_DIR=$(out)/etc/systemd/system" + ]; + + meta = with stdenv.lib; { + homepage = "https://github.com/Zygo/bees"; + description = "Block-oriented BTRFS deduplication service"; + license = licenses.gpl3; + platforms = platforms.linux; + maintainers = with maintainers; [ chaduffy ]; + longDescription = "Best-Effort Extent-Same: bees finds not just identical files, but also identical extents within files that differ"; + }; + }; + +in + +runCommand "bees-service-${version}" { + inherit bash bees coreutils utillinux; + btrfsProgs = btrfs-progs; # needs to be a valid shell variable name +} '' + mkdir -p -- "$out/bin" + substituteAll ${./bees-service-wrapper} "$out"/bin/bees-service-wrapper + chmod +x "$out"/bin/bees-service-wrapper + ln -s ${bees}/bin/beesd "$out"/bin/beesd +'' diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 09f566ccd7f..c7fe0a978c6 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -21893,6 +21893,8 @@ with pkgs; beep = callPackage ../misc/beep { }; + bees = callPackage ../tools/filesystems/bees { }; + blackbird = callPackage ../misc/themes/blackbird { }; bootil = callPackage ../development/libraries/bootil { }; From 86db2f394cdc8d96f84c50f92da0e5bb96843b52 Mon Sep 17 00:00:00 2001 From: Charles Duffy <charles@dyfis.net> Date: Sun, 14 Oct 2018 10:58:56 -0500 Subject: [PATCH 2/3] nixos/modules: services.bees init --- nixos/modules/module-list.nix | 1 + nixos/modules/services/misc/bees.nix | 123 +++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 nixos/modules/services/misc/bees.nix diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index ae5084ca2a2..5074976fafa 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -336,6 +336,7 @@ ./services/misc/apache-kafka.nix ./services/misc/autofs.nix ./services/misc/autorandr.nix + ./services/misc/bees.nix ./services/misc/bepasty.nix ./services/misc/canto-daemon.nix ./services/misc/calibre-server.nix diff --git a/nixos/modules/services/misc/bees.nix b/nixos/modules/services/misc/bees.nix new file mode 100644 index 00000000000..b0ed2d5c286 --- /dev/null +++ b/nixos/modules/services/misc/bees.nix @@ -0,0 +1,123 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + + cfg = config.services.beesd; + + logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; }; + + fsOptions = with types; { + options.spec = mkOption { + type = str; + description = '' + Description of how to identify the filesystem to be duplicated by this + instance of bees. Note that deduplication crosses subvolumes; one must + not configure multiple instances for subvolumes of the same filesystem + (or block devices which are part of the same filesystem), but only for + completely independent btrfs filesystems. + </para> + <para> + This must be in a format usable by findmnt; that could be a key=value + pair, or a bare path to a mount point. + ''; + example = "LABEL=MyBulkDataDrive"; + }; + options.hashTableSizeMB = mkOption { + type = types.addCheck types.int (n: mod n 16 == 0); + default = 1024; # 1GB; default from upstream beesd script + description = '' + Hash table size in MB; must be a multiple of 16. + </para> + <para> + A larger ratio of index size to storage size means smaller blocks of + duplicate content are recognized. + </para> + <para> + If you have 1TB of data, a 4GB hash table (which is to say, a value of + 4096) will permit 4KB extents (the smallest possible size) to be + recognized, whereas a value of 1024 -- creating a 1GB hash table -- + will recognize only aligned duplicate blocks of 16KB. + ''; + }; + options.verbosity = mkOption { + type = types.enum (attrNames logLevels ++ attrValues logLevels); + apply = v: if isString v then logLevels.${v} else v; + default = "info"; + description = "Log verbosity (syslog keyword/level)."; + }; + options.workDir = mkOption { + type = str; + default = ".beeshome"; + description = '' + Name (relative to the root of the filesystem) of the subvolume where + the hash table will be stored. + ''; + }; + options.extraOptions = mkOption { + type = listOf str; + default = []; + description = '' + Extra command-line options passed to the daemon. See upstream bees documentation. + ''; + example = literalExample '' + [ "--thread-count" "4" ] + ''; + }; + }; + +in { + + options.services.beesd = { + filesystems = mkOption { + type = with types; attrsOf (submodule fsOptions); + description = "BTRFS filesystems to run block-level deduplication on."; + default = { }; + example = literalExample '' + { + root = { + spec = "LABEL=root"; + hashTableSizeMB = 2048; + verbosity = "crit"; + extraOptions = [ "--loadavg-target" "5.0" ]; + }; + } + ''; + }; + }; + config = { + systemd.services = mapAttrs' (name: fs: nameValuePair "beesd@${name}" { + description = "Block-level BTRFS deduplication for %i"; + after = [ "sysinit.target" ]; + + serviceConfig = let + configOpts = [ + fs.spec + "verbosity=${toString fs.verbosity}" + "idxSizeMB=${toString fs.hashTableSizeMB}" + "workDir=${fs.workDir}" + ]; + configOptsStr = escapeShellArgs configOpts; + in { + # Values from https://github.com/Zygo/bees/blob/v0.6.1/scripts/beesd%40.service.in + ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}"; + ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}"; + CPUAccounting = true; + CPUWeight = 12; + IOSchedulingClass = "idle"; + IOSchedulingPriority = 7; + IOWeight = 10; + KillMode = "control-group"; + KillSignal = "SIGTERM"; + MemoryAccounting = true; + Nice = 19; + Restart = "on-abnormal"; + StartupCPUWeight = 25; + StartupIOWeight = 25; + SyslogIdentifier = "bees"; # would otherwise be "bees-service-wrapper" + }; + wantedBy = ["multi-user.target"]; + }) cfg.filesystems; + }; +} From f50bfe267a312515d88e86c12ae002c4feefcc1f Mon Sep 17 00:00:00 2001 From: Charles Duffy <charles@dyfis.net> Date: Mon, 26 Nov 2018 13:47:58 -0600 Subject: [PATCH 3/3] nixos.tests.bees: init --- nixos/tests/bees.nix | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 nixos/tests/bees.nix diff --git a/nixos/tests/bees.nix b/nixos/tests/bees.nix new file mode 100644 index 00000000000..6f68c2f834f --- /dev/null +++ b/nixos/tests/bees.nix @@ -0,0 +1,55 @@ +import ./make-test.nix ({ lib, ... }: +{ + name = "bees"; + + machine = { config, pkgs, ... }: { + boot.initrd.postDeviceCommands = '' + ${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux1 /dev/vdb + ${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux2 /dev/vdc + ''; + virtualisation.emptyDiskImages = [ 4096 4096 ]; + fileSystems = lib.mkVMOverride { + "/aux1" = { # filesystem configured to be deduplicated + device = "/dev/disk/by-label/aux1"; + fsType = "btrfs"; + }; + "/aux2" = { # filesystem not configured to be deduplicated + device = "/dev/disk/by-label/aux2"; + fsType = "btrfs"; + }; + }; + services.beesd.filesystems = { + aux1 = { + spec = "LABEL=aux1"; + hashTableSizeMB = 16; + verbosity = "debug"; + }; + }; + }; + + testScript = + let + withRetry = content: maxTests: sleepTime: '' + max_tests=${lib.escapeShellArg maxTests}; sleep_time=${lib.escapeShellArg sleepTime}; for ((i=0; i<max_tests; i++)); do ${content} && exit 0; sleep "$sleep_time"; done; exit 1; + ''; + someContentIsShared = loc: ''[[ $(btrfs fi du -s --raw ${lib.escapeShellArg loc}/dedup-me-{1,2} | awk 'BEGIN { count=0; } NR>1 && $3 == 0 { count++ } END { print count }') -eq 0 ]]''; + in '' + # shut down the instance started by systemd at boot, so we can test our test procedure + $machine->succeed("systemctl stop beesd\@aux1.service"); + + $machine->succeed("dd if=/dev/urandom of=/aux1/dedup-me-1 bs=1M count=8"); + $machine->succeed("cp --reflink=never /aux1/dedup-me-1 /aux1/dedup-me-2"); + $machine->succeed("cp --reflink=never /aux1/* /aux2/"); + $machine->succeed("sync"); + $machine->fail(q(${someContentIsShared "/aux1"})); + $machine->fail(q(${someContentIsShared "/aux2"})); + $machine->succeed("systemctl start beesd\@aux1.service"); + + # assert that "Set Shared" column is nonzero + $machine->succeed(q(${withRetry (someContentIsShared "/aux1") 20 2})); + $machine->fail(q(${someContentIsShared "/aux2"})); + + # assert that 16MB hash table size requested was honored + $machine->succeed(q([[ $(stat -c %s /aux1/.beeshome/beeshash.dat) = $(( 16 * 1024 * 1024)) ]])) + ''; +})