Merge pull request #48423 from charles-dyfis-net/bees
bees: init at 0.6.1; nixos/modules: services.bees init
This commit is contained in:
commit
4afae70e2b
|
@ -340,6 +340,7 @@
|
|||
./services/misc/apache-kafka.nix
|
||||
./services/misc/autofs.nix
|
||||
./services/misc/autorandr.nix
|
||||
./services/misc/bees.nix
|
||||
./services/misc/bepasty.nix
|
||||
./services/misc/canto-daemon.nix
|
||||
./services/misc/calibre-server.nix
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
|
||||
cfg = config.services.beesd;
|
||||
|
||||
logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };
|
||||
|
||||
fsOptions = with types; {
|
||||
options.spec = mkOption {
|
||||
type = str;
|
||||
description = ''
|
||||
Description of how to identify the filesystem to be duplicated by this
|
||||
instance of bees. Note that deduplication crosses subvolumes; one must
|
||||
not configure multiple instances for subvolumes of the same filesystem
|
||||
(or block devices which are part of the same filesystem), but only for
|
||||
completely independent btrfs filesystems.
|
||||
</para>
|
||||
<para>
|
||||
This must be in a format usable by findmnt; that could be a key=value
|
||||
pair, or a bare path to a mount point.
|
||||
'';
|
||||
example = "LABEL=MyBulkDataDrive";
|
||||
};
|
||||
options.hashTableSizeMB = mkOption {
|
||||
type = types.addCheck types.int (n: mod n 16 == 0);
|
||||
default = 1024; # 1GB; default from upstream beesd script
|
||||
description = ''
|
||||
Hash table size in MB; must be a multiple of 16.
|
||||
</para>
|
||||
<para>
|
||||
A larger ratio of index size to storage size means smaller blocks of
|
||||
duplicate content are recognized.
|
||||
</para>
|
||||
<para>
|
||||
If you have 1TB of data, a 4GB hash table (which is to say, a value of
|
||||
4096) will permit 4KB extents (the smallest possible size) to be
|
||||
recognized, whereas a value of 1024 -- creating a 1GB hash table --
|
||||
will recognize only aligned duplicate blocks of 16KB.
|
||||
'';
|
||||
};
|
||||
options.verbosity = mkOption {
|
||||
type = types.enum (attrNames logLevels ++ attrValues logLevels);
|
||||
apply = v: if isString v then logLevels.${v} else v;
|
||||
default = "info";
|
||||
description = "Log verbosity (syslog keyword/level).";
|
||||
};
|
||||
options.workDir = mkOption {
|
||||
type = str;
|
||||
default = ".beeshome";
|
||||
description = ''
|
||||
Name (relative to the root of the filesystem) of the subvolume where
|
||||
the hash table will be stored.
|
||||
'';
|
||||
};
|
||||
options.extraOptions = mkOption {
|
||||
type = listOf str;
|
||||
default = [];
|
||||
description = ''
|
||||
Extra command-line options passed to the daemon. See upstream bees documentation.
|
||||
'';
|
||||
example = literalExample ''
|
||||
[ "--thread-count" "4" ]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
|
||||
options.services.beesd = {
|
||||
filesystems = mkOption {
|
||||
type = with types; attrsOf (submodule fsOptions);
|
||||
description = "BTRFS filesystems to run block-level deduplication on.";
|
||||
default = { };
|
||||
example = literalExample ''
|
||||
{
|
||||
root = {
|
||||
spec = "LABEL=root";
|
||||
hashTableSizeMB = 2048;
|
||||
verbosity = "crit";
|
||||
extraOptions = [ "--loadavg-target" "5.0" ];
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
config = {
|
||||
systemd.services = mapAttrs' (name: fs: nameValuePair "beesd@${name}" {
|
||||
description = "Block-level BTRFS deduplication for %i";
|
||||
after = [ "sysinit.target" ];
|
||||
|
||||
serviceConfig = let
|
||||
configOpts = [
|
||||
fs.spec
|
||||
"verbosity=${toString fs.verbosity}"
|
||||
"idxSizeMB=${toString fs.hashTableSizeMB}"
|
||||
"workDir=${fs.workDir}"
|
||||
];
|
||||
configOptsStr = escapeShellArgs configOpts;
|
||||
in {
|
||||
# Values from https://github.com/Zygo/bees/blob/v0.6.1/scripts/beesd%40.service.in
|
||||
ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";
|
||||
ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";
|
||||
CPUAccounting = true;
|
||||
CPUWeight = 12;
|
||||
IOSchedulingClass = "idle";
|
||||
IOSchedulingPriority = 7;
|
||||
IOWeight = 10;
|
||||
KillMode = "control-group";
|
||||
KillSignal = "SIGTERM";
|
||||
MemoryAccounting = true;
|
||||
Nice = 19;
|
||||
Restart = "on-abnormal";
|
||||
StartupCPUWeight = 25;
|
||||
StartupIOWeight = 25;
|
||||
SyslogIdentifier = "bees"; # would otherwise be "bees-service-wrapper"
|
||||
};
|
||||
wantedBy = ["multi-user.target"];
|
||||
}) cfg.filesystems;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
import ./make-test.nix ({ lib, ... }:
|
||||
{
|
||||
name = "bees";
|
||||
|
||||
machine = { config, pkgs, ... }: {
|
||||
boot.initrd.postDeviceCommands = ''
|
||||
${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux1 /dev/vdb
|
||||
${pkgs.btrfs-progs}/bin/mkfs.btrfs -f -L aux2 /dev/vdc
|
||||
'';
|
||||
virtualisation.emptyDiskImages = [ 4096 4096 ];
|
||||
fileSystems = lib.mkVMOverride {
|
||||
"/aux1" = { # filesystem configured to be deduplicated
|
||||
device = "/dev/disk/by-label/aux1";
|
||||
fsType = "btrfs";
|
||||
};
|
||||
"/aux2" = { # filesystem not configured to be deduplicated
|
||||
device = "/dev/disk/by-label/aux2";
|
||||
fsType = "btrfs";
|
||||
};
|
||||
};
|
||||
services.beesd.filesystems = {
|
||||
aux1 = {
|
||||
spec = "LABEL=aux1";
|
||||
hashTableSizeMB = 16;
|
||||
verbosity = "debug";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
testScript =
|
||||
let
|
||||
withRetry = content: maxTests: sleepTime: ''
|
||||
max_tests=${lib.escapeShellArg maxTests}; sleep_time=${lib.escapeShellArg sleepTime}; for ((i=0; i<max_tests; i++)); do ${content} && exit 0; sleep "$sleep_time"; done; exit 1;
|
||||
'';
|
||||
someContentIsShared = loc: ''[[ $(btrfs fi du -s --raw ${lib.escapeShellArg loc}/dedup-me-{1,2} | awk 'BEGIN { count=0; } NR>1 && $3 == 0 { count++ } END { print count }') -eq 0 ]]'';
|
||||
in ''
|
||||
# shut down the instance started by systemd at boot, so we can test our test procedure
|
||||
$machine->succeed("systemctl stop beesd\@aux1.service");
|
||||
|
||||
$machine->succeed("dd if=/dev/urandom of=/aux1/dedup-me-1 bs=1M count=8");
|
||||
$machine->succeed("cp --reflink=never /aux1/dedup-me-1 /aux1/dedup-me-2");
|
||||
$machine->succeed("cp --reflink=never /aux1/* /aux2/");
|
||||
$machine->succeed("sync");
|
||||
$machine->fail(q(${someContentIsShared "/aux1"}));
|
||||
$machine->fail(q(${someContentIsShared "/aux2"}));
|
||||
$machine->succeed("systemctl start beesd\@aux1.service");
|
||||
|
||||
# assert that "Set Shared" column is nonzero
|
||||
$machine->succeed(q(${withRetry (someContentIsShared "/aux1") 20 2}));
|
||||
$machine->fail(q(${someContentIsShared "/aux2"}));
|
||||
|
||||
# assert that 16MB hash table size requested was honored
|
||||
$machine->succeed(q([[ $(stat -c %s /aux1/.beeshome/beeshash.dat) = $(( 16 * 1024 * 1024)) ]]))
|
||||
'';
|
||||
})
|
|
@ -0,0 +1,223 @@
|
|||
#!@bash@/bin/bash
|
||||
PATH=@bash@/bin:@coreutils@/bin:@utillinux@/bin:@btrfsProgs@/bin:$PATH
|
||||
beesd_bin=@bees@/lib/bees/bees
|
||||
# PLEASE KEEP NIX-ISMS ABOVE THIS LINE TO EASE UPSTREAM MERGE
|
||||
#!/usr/bin/env bash
|
||||
|
||||
shopt -s extglob
|
||||
|
||||
# Upstream wrapper requires UUID to be used for configuration.
|
||||
|
||||
# However, when declaratively describing a host, we may not know its UUID, and
|
||||
# shouldn't need to persist something that will differ between hosts built from
|
||||
# the same configuration template.
|
||||
|
||||
# Thus, for using bees from NixOS, we have our own wrapper, which supports not
|
||||
# just UUID but any specification permitted by findmnt
|
||||
|
||||
[[ $bees_debug ]] && { PS4=':${BASH_SOURCE##*/}:$LINENO+'; set -x; }
|
||||
|
||||
usage() {
|
||||
cat >&2 <<EOF
|
||||
Usage: ${BASH_SOURCE##*/} run|cleanup config-name|fsSpec [idxSizeMB=...] [verbosity=...] [workDir=...] [-- daemon-options...]
|
||||
|
||||
fsSpec should be in a format recognized by findmnt. Alternately,
|
||||
"config-name" may refer to a file that exists in ${bees_config_dir:-/etc/bees}
|
||||
with a .conf extension; if that file does not specify UUID, findmnt will be
|
||||
used in addition.
|
||||
|
||||
Note that while config files may presently use shell arithmetic, use of this
|
||||
functionality is not encouraged going forward: Setting ''idxSizeMB=4096'' is
|
||||
preferred over ''DB_SIZE=$((1024*1024*1024*4))'' or ''DB_SIZE=$(( AL16M * 256 ))'',
|
||||
although both of these are presently supported.
|
||||
|
||||
If fsSpec contains a /, it assumed to be a mount point to be looked up by
|
||||
findmnt, not a config file name.
|
||||
|
||||
daemon-options are passed directly through to the daemon on startup, as
|
||||
documented at https://github.com/Zygo/bees/blob/master/docs/options.md.
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
die() { echo "$*" >&2; exit 1; }
|
||||
|
||||
allConfigNames=( blockdev fsSpec home idxSize idxSizeMB mntDir runDir status verbosity workDir )
|
||||
|
||||
# Alternate names for configuration values; "bees_" will always be prepended
|
||||
declare -A altConfigNames=(
|
||||
# from original bees wrapper
|
||||
[BEESHOME]=home
|
||||
[BEESSTATUS]=status
|
||||
[MNT_DIR]=mntDir
|
||||
[UUID]=uuid
|
||||
[WORK_DIR]=runDir
|
||||
[DB_SIZE]=idxSize
|
||||
)
|
||||
|
||||
# legacy bees config files can be arbitrary shell scripts, so we need to actually evaluate them
|
||||
sandboxedConfigFileEval() {
|
||||
bash_exe=$(type -P bash) || exit
|
||||
PATH=/var/empty ENV='' BASH_ENV='' AL128K="$((128*1024))" AL16M="$((16*1024*1024))" "$bash_exe" -r ${bees_debug+-x} \
|
||||
-c 'eval "$(</dev/stdin)" >&2; for var; do [[ ${!var} ]] && printf "%q=%s\\0" "$var" "${!var}"; done' \
|
||||
"${!altConfigNames[@]}" "${allConfigNames[@]}" \
|
||||
<"$1"
|
||||
}
|
||||
|
||||
readConfigFileIfExists() {
|
||||
local line
|
||||
[[ -s $1 ]] || return 1
|
||||
while IFS= read -r -d '' line; do
|
||||
line=${line%%+([[:space:]])"#"*}
|
||||
[[ $line ]] || continue
|
||||
[[ $line = *=* ]] || {
|
||||
printf 'WARNING: Config file line not recognized: %q\n' "$line" >&2
|
||||
continue
|
||||
}
|
||||
set_option "$line"
|
||||
done < <(sandboxedConfigFileEval "$1")
|
||||
}
|
||||
|
||||
set_option() {
|
||||
local k v
|
||||
k="${1%%=*}" v="${1#*=}"
|
||||
[[ ${altConfigNames[$k]} ]] && k=${altConfigNames[$k]}
|
||||
printf -v "bees_$k" %s "$v"
|
||||
}
|
||||
|
||||
uuid_re='^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$'
|
||||
|
||||
# Shared code for setting configuration used by other operations.
|
||||
#
|
||||
# Reads from global associative array "opts" containing options passed in as
|
||||
# key=value pairs on the command line, looks for config-file overrides, and
|
||||
# sets individual global variables.
|
||||
_setup() {
|
||||
declare fstype
|
||||
bees_fsSpec=$1; shift
|
||||
|
||||
# Look for file-based configuration, additional to honoring configuration on the command line
|
||||
bees_config_dir="${bees_config_dir:-/etc/bees}"
|
||||
if [[ $bees_fsSpec =~ $uuid_re ]]; then
|
||||
bees_uuid=$bees_fsSpec
|
||||
# If our spec looks like a bare UUID, and no config file exists in the new
|
||||
# format, fall back to legacy config file search mechanism (grep; ewww).
|
||||
if ! readConfigFileIfExists "$bees_config_dir/UUID=$bees_fsSpec.conf"; then
|
||||
# Legacy approach to finding a config file: Grep for a *.conf file
|
||||
# containing the UUID within its text. Permitting spaces around the "="
|
||||
# appears to be a bug, but is retained for compatibility with the
|
||||
# original upstream script.
|
||||
allConfFiles=( "$bees_config_dir"/*.conf )
|
||||
if (( ${#allConfFiles[@]} )); then
|
||||
# in read or readarray with -d '', the NUL terminating the empty string is used as delimiter character.
|
||||
readarray -d '' -t matchingConfFiles < <(grep -E -l -Z "^[^#]*UUID[[:space:]]*=[[:space:]]*" "${allConfFiles[@]}")
|
||||
else
|
||||
matchingConfFiles=( )
|
||||
fi
|
||||
if (( ${#matchingConfFiles[@]} == 1 )); then
|
||||
# Exactly one configuration file exists in our target directory with a reference to the UUID given.
|
||||
bees_config_file=${matchingConfFiles[0]}
|
||||
readConfigFileIfExists "$bees_config_file"
|
||||
echo "NOTE: Please consider renaming $bees_config_file to $bees_config_dir/UUID=$bees_fsSpec" >&2
|
||||
echo " ...and passing UUID=$bees_fsSpec on startup." >&2
|
||||
elif (( ${#matchingConfFiles[@]} > 1 )); then
|
||||
# The legacy wrapper would silently use the first file and ignore
|
||||
# others, but... no.
|
||||
echo "ERROR: Passed a bare UUID, but multiple configuration files match it:" >&2
|
||||
printf ' - %q\n' "${matchingConfFiles[@]}" >&2
|
||||
die "Unable to continue."
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# For a non-UUID fsSpec that is not a path, look only for a config file
|
||||
# exactly matching its text.
|
||||
#
|
||||
# (Passing a mount point as a fsSpec is only supported with the new
|
||||
# wrapper; all key=value pairs can be passed on the command line in this
|
||||
# mode, so config file support is not needed).
|
||||
[[ $bees_fsSpec = */* ]] || readConfigFileIfExists "$bees_config_dir/$bees_fsSpec.conf"
|
||||
fi
|
||||
|
||||
[[ $bees_uuid ]] || {
|
||||
# if bees_uuid is not in our .conf file, look it up with findmnt
|
||||
read -r bees_uuid fstype < <(findmnt -n -o uuid,fstype "$bees_fsSpec") && [[ $fstype ]] || exit
|
||||
[[ $fstype = btrfs ]] || die "Device type is $fstype, not btrfs"
|
||||
}
|
||||
|
||||
[[ $bees_uuid = */* ]] || readConfigFileIfExists "$bees_config_dir/UUID=$bees_uuid.conf"
|
||||
|
||||
# Honor any values read from config files above; otherwise, set defaults.
|
||||
bees_workDir="${bees_workDir:-.beeshome}"
|
||||
bees_runDir="${bees_runDir:-/run/bees}"
|
||||
bees_mntDir="${bees_mntDir:-$bees_runDir/mnt/$bees_uuid}"
|
||||
bees_home="${bees_home:-$bees_mntDir/$bees_workDir}"
|
||||
bees_status="${bees_status:-${bees_runDir}/$bees_uuid.status}"
|
||||
bees_verbosity="${bees_verbosity:-6}"
|
||||
bees_idxSizeMB="${bees_idxSizeMB:-1024}"
|
||||
bees_idxSize=${bees_idxSize:-"$(( bees_idxSizeMB * 1024 * 1024 ))"}
|
||||
bees_blockdev=${bees_blockdev:-"/dev/disk/by-uuid/$bees_uuid"}
|
||||
|
||||
[[ -b $bees_blockdev ]] || die "Block device $bees_blockdev missing"
|
||||
(( bees_idxSize % (16 * 1024 * 1024) == 0 )) || die "DB size must be divisible by 16MB"
|
||||
}
|
||||
|
||||
do_run() {
|
||||
local db old_db_size
|
||||
|
||||
_setup "$1"; shift
|
||||
mkdir -p -- "$bees_mntDir" || exit
|
||||
|
||||
# subvol id 5 is reserved for the root subvolume of a btrfs filesystem.
|
||||
mountpoint -q "$bees_mntDir" || mount -osubvolid=5 -- "$bees_blockdev" "$bees_mntDir" || exit
|
||||
if [[ -d $bees_home ]]; then
|
||||
btrfs subvolume show "$bees_home" >/dev/null 2>&1 || die "$bees_home exists but is not a subvolume"
|
||||
else
|
||||
btrfs subvolume create "$bees_home" || exit
|
||||
sync # workaround for Zygo/bees#93
|
||||
fi
|
||||
db=$bees_home/beeshash.dat
|
||||
touch -- "$db"
|
||||
|
||||
old_db_size=$(stat -c %s -- "$db")
|
||||
new_db_size=$bees_idxSize
|
||||
|
||||
if (( old_db_size != new_db_size )); then
|
||||
rm -f -- "$bees_home"/beescrawl."$bees_uuid".dat
|
||||
truncate -s "$new_db_size" -- "$db" || exit
|
||||
fi
|
||||
chmod 700 -- "$bees_home"
|
||||
|
||||
# BEESSTATUS and BEESHOME are the only variables handled by the legacy
|
||||
# wrapper for which getenv() is called in C code.
|
||||
BEESSTATUS=$bees_status BEESHOME=$bees_home exec "${beesd_bin:-/lib/bees/bees}" \
|
||||
--verbose "$bees_verbosity" \
|
||||
"$@" "$bees_mntDir" || exit
|
||||
}
|
||||
|
||||
do_cleanup() {
|
||||
_setup "$1"; shift
|
||||
mountpoint -q "$bees_mntDir" && umount -l -- "$bees_mntDir" || exit
|
||||
}
|
||||
|
||||
(( $# >= 2 )) || usage
|
||||
declare -f "do_$1" >/dev/null 2>&1 || usage
|
||||
mode=$1; shift # must be a do_* function; currently "run" or "cleanup"
|
||||
|
||||
declare -a args=( "$1" ); shift # pass first argument (config-name|fsSpec) through literally
|
||||
|
||||
# parse other arguments as key=value pairs, or pass them through literally if they do not match that form.
|
||||
# similarly, any option after "--" will be passed through literally.
|
||||
while (( $# )); do
|
||||
if [[ $1 = *=* ]]; then
|
||||
set_option "$1"
|
||||
elif [[ $1 = -- ]]; then
|
||||
shift
|
||||
args+=( "$@" )
|
||||
break
|
||||
else
|
||||
args+=( "$1" )
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
"do_$mode" "${args[@]}"
|
|
@ -0,0 +1,69 @@
|
|||
{ stdenv, runCommand, makeWrapper, fetchFromGitHub, bash, btrfs-progs, coreutils, pythonPackages, utillinux }:
|
||||
|
||||
let
|
||||
|
||||
version = "0.6.1";
|
||||
sha256 = "0h7idclmhyp14mq6786x7f2237vqpn70gyi88ik4g70xl84yfgyh";
|
||||
|
||||
bees = stdenv.mkDerivation rec {
|
||||
name = "bees-${version}";
|
||||
inherit version;
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "Zygo";
|
||||
repo = "bees";
|
||||
rev = "v${version}";
|
||||
inherit sha256;
|
||||
};
|
||||
|
||||
buildInputs = [
|
||||
btrfs-progs # for btrfs/ioctl.h
|
||||
utillinux # for uuid.h
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pythonPackages.markdown # documentation build
|
||||
];
|
||||
|
||||
preBuild = ''
|
||||
git() { if [[ $1 = describe ]]; then echo ${version}; else command git "$@"; fi; }
|
||||
export -f git
|
||||
'';
|
||||
|
||||
postBuild = ''
|
||||
unset -f git
|
||||
'';
|
||||
|
||||
buildFlags = [
|
||||
"ETC_PREFIX=/var/run/bees/configs"
|
||||
];
|
||||
|
||||
makeFlags = [
|
||||
"SHELL=bash"
|
||||
"PREFIX=$(out)"
|
||||
"ETC_PREFIX=$(out)/etc"
|
||||
"BEES_VERSION=${version}"
|
||||
"SYSTEMD_SYSTEM_UNIT_DIR=$(out)/etc/systemd/system"
|
||||
];
|
||||
|
||||
meta = with stdenv.lib; {
|
||||
homepage = "https://github.com/Zygo/bees";
|
||||
description = "Block-oriented BTRFS deduplication service";
|
||||
license = licenses.gpl3;
|
||||
platforms = platforms.linux;
|
||||
maintainers = with maintainers; [ chaduffy ];
|
||||
longDescription = "Best-Effort Extent-Same: bees finds not just identical files, but also identical extents within files that differ";
|
||||
};
|
||||
};
|
||||
|
||||
in
|
||||
|
||||
runCommand "bees-service-${version}" {
|
||||
inherit bash bees coreutils utillinux;
|
||||
btrfsProgs = btrfs-progs; # needs to be a valid shell variable name
|
||||
} ''
|
||||
mkdir -p -- "$out/bin"
|
||||
substituteAll ${./bees-service-wrapper} "$out"/bin/bees-service-wrapper
|
||||
chmod +x "$out"/bin/bees-service-wrapper
|
||||
ln -s ${bees}/bin/beesd "$out"/bin/beesd
|
||||
''
|
|
@ -21936,6 +21936,8 @@ with pkgs;
|
|||
|
||||
beep = callPackage ../misc/beep { };
|
||||
|
||||
bees = callPackage ../tools/filesystems/bees { };
|
||||
|
||||
blackbird = callPackage ../misc/themes/blackbird { };
|
||||
|
||||
bootil = callPackage ../development/libraries/bootil { };
|
||||
|
|
Loading…
Reference in New Issue