Implement dockerTools.streamLayeredImage

This commit is contained in:
Utku Demir 2020-06-08 21:47:46 +12:00
parent 60f0e62b3c
commit 560201da66
No known key found for this signature in database
GPG Key ID: F3F8629C3E0BF60B
4 changed files with 303 additions and 304 deletions

View File

@ -11,6 +11,7 @@
jq,
jshon,
lib,
makeWrapper,
moreutils,
nix,
pigz,
@ -29,6 +30,7 @@
writeReferencesToFile,
writeScript,
writeText,
writePython3,
}:
# WARNING: this API is unstable and may be subject to backwards-incompatible changes in the future.
@ -204,24 +206,17 @@ rec {
mkdir image
tar -C image -xpf "$fromImage"
# If the image name isn't set, read it from the image repository json.
if [[ -z "$fromImageName" ]]; then
fromImageName=$(jshon -k < image/repositories | head -n 1)
echo "From-image name wasn't set. Read $fromImageName."
if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then
parentID="$(
cat "image/manifest.json" |
jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \
--arg desiredTag "$fromImageName:$fromImageTag"
)"
else
echo "From-image name or tag wasn't set. Reading the first ID."
parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')"
fi
# If the tag isn't set, use the name as an index into the json
# and read the first key found.
if [[ -z "$fromImageTag" ]]; then
fromImageTag=$(jshon -e $fromImageName -k < image/repositories \
| head -n1)
echo "From-image tag wasn't set. Read $fromImageTag."
fi
# Use the name and tag to get the parent ID field.
parentID=$(jshon -e $fromImageName -e $fromImageTag -u \
< image/repositories)
cat ./image/manifest.json | jq -r '.[0].Layers | .[]' > layer-list
else
touch layer-list
@ -305,106 +300,6 @@ rec {
${text}
'';
# Create $maxLayers worth of Docker Layers, one layer per store path
# unless there are more paths than $maxLayers. In that case, create
# $maxLayers-1 for the most popular layers, and smush the remainaing
# store paths in to one final layer.
#
# NOTE: the `closures` parameter is a list of closures to include.
# The TOP LEVEL store paths themselves will never be present in the
# resulting image. At this time (2019-12-16) none of these layers
# are appropriate to include, as they are all created as
# implementation details of dockerTools.
mkManyPureLayers = {
name,
# Files to add to the layer.
closures,
configJson,
# Docker has a 125-layer maximum, we pick 100 to ensure there is
# plenty of room for extension.
# https://github.com/moby/moby/blob/b3e9f7b13b0f0c414fa6253e1f17a86b2cff68b5/layer/layer_store.go#L23-L26
maxLayers ? 100
}:
let
storePathToLayer = substituteAll
{ shell = runtimeShell;
isExecutable = true;
src = ./store-path-to-layer.sh;
};
overallClosure = writeText "closure" (lib.concatStringsSep " " closures);
in
runCommand "${name}-granular-docker-layers" {
inherit maxLayers;
paths = referencesByPopularity overallClosure;
nativeBuildInputs = [ jshon rsync tarsum moreutils ];
enableParallelBuilding = true;
}
''
mkdir layers
# Delete impurities for store path layers, so they don't get
# shared and taint other projects.
cat ${configJson} \
| jshon -d config \
| jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
# WARNING!
# The following code is fiddly w.r.t. ensuring every layer is
# created, and that no paths are missed. If you change the
# following head and tail call lines, double-check that your
# code behaves properly when the number of layers equals:
# maxLayers-1, maxLayers, and maxLayers+1, 0
paths() {
cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])}
}
paths | head -n $((maxLayers - 1)) | cat -n | xargs -r -P$NIX_BUILD_CORES -n2 ${storePathToLayer}
if [ $(paths | wc -l) -ge $maxLayers ]; then
paths | tail -n+$maxLayers | xargs ${storePathToLayer} $maxLayers
fi
echo "Finished building layer '$name'"
mv ./layers $out
'';
# Create a "Customisation" layer which adds symlinks at the root of
# the image to the root paths of the closure. Also add the config
# data like what command to run and the environment to run it in.
mkCustomisationLayer = {
name,
# Files to add to the layer.
contents,
baseJson,
extraCommands,
uid ? 0, gid ? 0,
}:
runCommand "${name}-customisation-layer" {
nativeBuildInputs = [ jshon rsync tarsum ];
inherit extraCommands;
}
''
cp -r ${contents}/ ./layer
if [[ -n $extraCommands ]]; then
chmod ug+w layer
(cd layer; eval "$extraCommands")
fi
# Tar up the layer and throw it into 'layer.tar', while calculating its checksum.
echo "Packing layer..."
mkdir $out
tarhash=$(tar --transform='s|^\./||' -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf - . | tee $out/layer.tar | tarsum)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
'';
# Create a "layer" (set of files).
mkPureLayer = {
# Name of the layer
@ -541,131 +436,14 @@ rec {
'';
};
buildLayeredImage = {
# Image Name
name,
# Image tag, the Nix's output hash will be used if null
tag ? null,
# Files to put on the image (a nix store path or list of paths).
contents ? [],
# Docker config; e.g. what command to run on the container.
config ? {},
# Time of creation of the image. Passing "now" will make the
# created date be the time of building.
created ? "1970-01-01T00:00:01Z",
# Optional bash script to run on the files prior to fixturizing the layer.
extraCommands ? "", uid ? 0, gid ? 0,
# We pick 100 to ensure there is plenty of room for extension. I
# believe the actual maximum is 128.
maxLayers ? 100
}:
assert
(lib.assertMsg (maxLayers > 1)
"the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
buildLayeredImage = {name, ...}@args:
let
baseName = baseNameOf name;
contentsEnv = symlinkJoin {
name = "bulk-layers";
paths = if builtins.isList contents
then contents
else [ contents ];
};
configJson = let
pure = writeText "${baseName}-config.json" (builtins.toJSON {
inherit created config;
architecture = buildPackages.go.GOARCH;
os = "linux";
});
impure = runCommand "${baseName}-standard-dynamic-date.json"
{ nativeBuildInputs = [ jq ]; }
''
jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
'';
in if created == "now" then impure else pure;
bulkLayers = mkManyPureLayers {
name = baseName;
closures = [ contentsEnv configJson ];
# One layer will be taken up by the customisationLayer, so
# take up one less.
maxLayers = maxLayers - 1;
inherit configJson;
};
customisationLayer = mkCustomisationLayer {
name = baseName;
contents = contentsEnv;
baseJson = configJson;
inherit uid gid extraCommands;
};
result = runCommand "docker-image-${baseName}.tar.gz" {
nativeBuildInputs = [ jshon pigz coreutils findutils jq ];
# Image name and tag must be lowercase
imageName = lib.toLower name;
baseJson = configJson;
passthru.imageTag =
if tag == null
then lib.head (lib.splitString "-" (lib.last (lib.splitString "/" result)))
else lib.toLower tag;
# Docker can't be made to run darwin binaries
meta.badPlatforms = lib.platforms.darwin;
} ''
${if (tag == null) then ''
outName="$(basename "$out")"
outHash=$(echo "$outName" | cut -d - -f 1)
imageTag=$outHash
'' else ''
imageTag="${tag}"
''}
find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
echo ${customisationLayer} >> layer-list
mkdir image
imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
for layer in $(cat layer-list); do
layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
ln -s "$layer" "./image/$layerID"
manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= . + [\"$layerID/layer.tar\"]")
imageJson=$(echo "$imageJson" | jq ".history |= . + [{\"created\": \"$(jq -r .created ${configJson})\"}]")
imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= . + [\"sha256:$layerChecksum\"]")
done
imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
echo "$imageJson" > "image/$imageJsonChecksum.json"
manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
echo "$manifestJson" > image/manifest.json
jshon -n object \
-n object -s "$layerID" -i "$imageTag" \
-i "$imageName" > image/repositories
echo "Cooking the image..."
# tar exits with an exit code of 1 if files changed while it was
# reading them. It considers a change in the number of hard links
# to be a "change", which can cause this to fail if images are being
# built concurrently and the auto-optimise-store nix option is turned on.
# Since the contents of these files will not change, we can reasonably
# ignore this exit code.
set +e
tar -C image --dereference --hard-dereference --sort=name \
--mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 \
--mode=a-w --xform s:'^./':: --use-compress-program='pigz -nT' \
--warning=no-file-changed -cf $out .
RET=$?
if [ $RET -ne 0 ] && [ $RET -ne 1 ]; then
exit $RET
fi
set -e
echo "Finished."
'';
stream = streamLayeredImage args;
in
result;
runCommand "${name}.tar.gz" {
inherit (stream) imageName;
buildInputs = [ pigz ];
} "${stream} | pigz -nT > $out";
# 1. extract the base image
# 2. create the layer
@ -774,20 +552,22 @@ rec {
configName="$(cat ./image/manifest.json | jq -r '.[0].Config')"
baseEnvs="$(cat "./image/$configName" | jq '.config.Env // []')"
# Extract the parentID from the manifest
if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then
parentID="$(
cat "image/manifest.json" |
jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \
--arg desiredTag "$fromImageName:$fromImageTag"
)"
else
echo "From-image name or tag wasn't set. Reading the first ID."
parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')"
fi
# Otherwise do not import the base image configuration and manifest
chmod a+w image image/*.json
rm -f image/*.json
if [[ -z "$fromImageName" ]]; then
fromImageName=$(jshon -k < image/repositories|head -n1)
fi
if [[ -z "$fromImageTag" ]]; then
fromImageTag=$(jshon -e $fromImageName -k \
< image/repositories|head -n1)
fi
parentID=$(jshon -e $fromImageName -e $fromImageTag -u \
< image/repositories)
for l in image/*/layer.tar; do
ls_tar $l >> baseFiles
done
@ -904,4 +684,110 @@ rec {
})
);
streamLayeredImage = {
# Image Name
name,
# Image tag, the Nix's output hash will be used if null
tag ? null,
# Files to put on the image (a nix store path or list of paths).
contents ? [],
# Docker config; e.g. what command to run on the container.
config ? {},
# Time of creation of the image. Passing "now" will make the
# created date be the time of building.
created ? "1970-01-01T00:00:01Z",
# Optional bash script to run on the files prior to fixturizing the layer.
extraCommands ? "",
# We pick 100 to ensure there is plenty of room for extension. I
# believe the actual maximum is 128.
maxLayers ? 100
}:
assert
(lib.assertMsg (maxLayers > 1)
"the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
let
streamScript = writePython3 "stream" {} ./stream_layered_image.py;
baseJson = writeText "${name}-base.json" (builtins.toJSON {
inherit config;
architecture = buildPackages.go.GOARCH;
os = "linux";
});
customisationLayer = runCommand "${name}-customisation-layer" { inherit extraCommands; } ''
cp -r ${contentsEnv}/ $out
if [[ -n $extraCommands ]]; then
chmod u+w $out
(cd $out; eval "$extraCommands")
fi
'';
contentsEnv = symlinkJoin {
name = "${name}-bulk-layers";
paths = if builtins.isList contents
then contents
else [ contents ];
};
# NOTE: the `closures` parameter is a list of closures to include.
# The TOP LEVEL store paths themselves will never be present in the
# resulting image. At this time (2019-12-16) none of these layers
# are appropriate to include, as they are all created as
# implementation details of dockerTools.
closures = [ baseJson contentsEnv ];
overallClosure = writeText "closure" (lib.concatStringsSep " " closures);
conf = runCommand "${name}-conf.json" {
inherit maxLayers created;
imageName = lib.toLower name;
paths = referencesByPopularity overallClosure;
buildInputs = [ jq ];
} ''
paths() {
cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])}
}
${if (tag == null) then ''
outName="$(basename "$out")"
outHash=$(echo "$outName" | cut -d - -f 1)
imageTag=$outHash
'' else ''
imageTag="${tag}"
''}
if [[ "$created" == "now" ]]; then
created="$(TZ=utc date --iso-8601="seconds")"
fi
# Create $maxLayers worth of Docker Layers, one layer per store path
# unless there are more paths than $maxLayers. In that case, create
# $maxLayers-1 for the most popular layers, and smush the remainaing
# store paths in to one final layer.
store_layers="$(
paths |
jq -sR '
rtrimstr("\n") | split("\n")
| (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ]
| map(select(length > 0))
' \
--argjson maxLayers "$(( maxLayers - 1 ))" # one layer will be taken up by the customisation layer
)"
cat ${baseJson} | jq '
. + {
"store_layers": $store_layers,
"customisation_layer", $customisation_layer,
"repo_tag": $repo_tag,
"created": $created
}
' --argjson store_layers "$store_layers" \
--arg customisation_layer ${customisationLayer} \
--arg repo_tag "$imageName:$imageTag" \
--arg created "$created" |
tee $out
'';
result = runCommand "stream-${name}" {
inherit (conf) imageName;
buildInputs = [ makeWrapper ];
} ''
makeWrapper ${streamScript} $out --add-flags ${conf}
'';
in result;
}

View File

@ -1,54 +0,0 @@
#!@shell@
set -eu
layerNumber=$1
shift
layerPath="./layers/$layerNumber"
echo "Creating layer #$layerNumber for $@"
mkdir -p "$layerPath"
# Make sure /nix and /nix/store appear first in the archive.
#
# We create the directories here and use them because
# when there are other things being added to the
# nix store, tar could fail, saying,
# "tar: /nix/store: file changed as we read it"
#
# In addition, we use `__Nix__` instead of `nix` to avoid renaming
# relative symlink destinations like
# /nix/store/...-nix-2.3.4/bin/nix-daemon -> nix
mkdir -p __Nix__/store
# Then we change into the /nix/store in order to
# avoid a similar "file changed as we read it" error
# as above. Namely, if we use the absolute path of
# /nix/store/123-pkg and something new is added to the nix
# store while tar is running, it will detect a change to
# /nix/store and fail. Instead, if we cd into the nix store
# and copy the relative nix store path, tar will ignore
# changes to /nix/store. In order to create the correct
# structure in the tar file, we transform the relative nix
# store path to the absolute store path.
tarhash=$(
basename -a "$@" |
tar --create --preserve-permissions --absolute-names nix \
--directory /nix/store --verbatim-files-from --files-from - \
--hard-dereference --sort=name \
--mtime="@$SOURCE_DATE_EPOCH" \
--owner=0 --group=0 \
--transform 's,^__Nix__$,/nix,' \
--transform 's,^__Nix__/store$,/nix/store,' \
--transform 's,^[^/],/nix/store/\0,rS' |
tee "$layerPath/layer.tar" |
tarsum
)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $layerPath/VERSION

View File

@ -0,0 +1,165 @@
import io
import os
import re
import sys
import json
import hashlib
import tarfile
import itertools
import threading
from collections import namedtuple
# Adds the given store paths to as a tar to the given writable stream.
def archive_paths_to(obj, paths, add_nix, filter=None):
filter = filter if filter else lambda i: i
# gettarinfo makes the paths relative, this makes them
# absolute again
def append_root(ti):
ti.name = "/" + ti.name
return ti
def dir(path):
ti = tarfile.TarInfo(path)
ti.type = tarfile.DIRTYPE
return ti
with tarfile.open(fileobj=obj, mode="w|") as tar:
if add_nix:
tar.addfile(dir("/nix"))
tar.addfile(dir("/nix/store"))
for path in paths:
ti = tar.gettarinfo(os.path.join("/", path))
tar.addfile(filter(append_root(ti)))
for root, dirs, files in os.walk(path, topdown=True):
for name in itertools.chain(dirs, files):
name = os.path.join(root, name)
ti = append_root(tar.gettarinfo(name))
# copy hardlinks as regular files
if ti.islnk():
ti.type = tarfile.REGTYPE
ti = filter(ti)
if ti.isfile():
with open(name, "rb") as f:
tar.addfile(ti, f)
else:
tar.addfile(ti)
# A writable stream which only calculates the final file size and
# sha256sum, while discarding the actual contents.
class ExtractChecksum:
def __init__(self):
self._digest = hashlib.sha256()
self._size = 0
def write(self, data):
self._digest.update(data)
self._size += len(data)
def extract(self):
return (self._digest.hexdigest(), self._size)
# Some metadata for a layer
LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
# Given a list of store paths 'paths', creates a layer add append it
# to tarfile 'tar'. Returns some a 'LayerInfo' for the layer.
def add_layer_dir(tar, paths, add_nix=True, filter=None):
assert all(i.startswith("/nix/store/") for i in paths)
extract_checksum = ExtractChecksum()
archive_paths_to(extract_checksum, paths, add_nix=add_nix, filter=filter)
(checksum, size) = extract_checksum.extract()
path = f"{checksum}/layer.tar"
ti = tarfile.TarInfo(path)
ti.size = size
read_fd, write_fd = os.pipe()
with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
def producer():
archive_paths_to(write, paths, add_nix=add_nix, filter=filter)
write.close()
threading.Thread(target=producer).start()
tar.addfile(ti, read)
return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
def add_customisation_layer(tar, path):
def filter(ti):
ti.name = re.sub("^/nix/store/[^/]*", "", ti.name)
return ti
return add_layer_dir(tar, [path], add_nix=False, filter=filter)
# Adds a file to the tarball with given path and contents.
def add_bytes(tar, path, content):
assert type(content) is bytes
ti = tarfile.TarInfo(path)
ti.size = len(content)
tar.addfile(ti, io.BytesIO(content))
# Main
with open(sys.argv[1], "r") as f:
conf = json.load(f)
with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar:
layers = []
for num, store_layer in enumerate(conf["store_layers"]):
print(
"Creating layer", num,
"from paths:", store_layer,
file=sys.stderr)
info = add_layer_dir(tar, store_layer)
layers.append(info)
print("Creating the customisation layer...", file=sys.stderr)
layers.append(add_customisation_layer(tar, conf["customisation_layer"]))
print("Adding manifests...", file=sys.stderr)
image_json = {
"created": conf["created"],
"architecture": conf["architecture"],
"os": "linux",
"config": conf["config"],
"rootfs": {
"diff_ids": [f"sha256:{layer.checksum}" for layer in layers],
"type": "layers",
},
"history": [
{
"created": conf["created"],
"comment": f"store paths: {layer.paths}"
}
for layer in layers
],
}
image_json = json.dumps(image_json, indent=4).encode("utf-8")
image_json_checksum = hashlib.sha256(image_json).hexdigest()
image_json_path = f"{image_json_checksum}.json"
add_bytes(tar, image_json_path, image_json)
manifest_json = [
{
"Config": image_json_path,
"RepoTags": [conf["repo_tag"]],
"Layers": [layer.path for layer in layers],
}
]
manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8")
add_bytes(tar, "manifest.json", manifest_json)
print("Done.", file=sys.stderr)

View File

@ -213,7 +213,9 @@ in
grsync = callPackage ../applications/misc/grsync { };
dockerTools = callPackage ../build-support/docker { };
dockerTools = callPackage ../build-support/docker {
writePython3 = writers.writePython3;
};
snapTools = callPackage ../build-support/snap { };