From 560201da661a4c916d0732e3bab8a77084485659 Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Mon, 8 Jun 2020 21:47:46 +1200 Subject: [PATCH 01/13] Implement dockerTools.streamLayeredImage --- pkgs/build-support/docker/default.nix | 384 ++++++------------ .../docker/store-path-to-layer.sh | 54 --- .../docker/stream_layered_image.py | 165 ++++++++ pkgs/top-level/all-packages.nix | 4 +- 4 files changed, 303 insertions(+), 304 deletions(-) delete mode 100755 pkgs/build-support/docker/store-path-to-layer.sh create mode 100644 pkgs/build-support/docker/stream_layered_image.py diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 83f4a9e0c01..d0ec90264ee 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -11,6 +11,7 @@ jq, jshon, lib, + makeWrapper, moreutils, nix, pigz, @@ -29,6 +30,7 @@ writeReferencesToFile, writeScript, writeText, + writePython3, }: # WARNING: this API is unstable and may be subject to backwards-incompatible changes in the future. @@ -204,24 +206,17 @@ rec { mkdir image tar -C image -xpf "$fromImage" - # If the image name isn't set, read it from the image repository json. - if [[ -z "$fromImageName" ]]; then - fromImageName=$(jshon -k < image/repositories | head -n 1) - echo "From-image name wasn't set. Read $fromImageName." + if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then + parentID="$( + cat "image/manifest.json" | + jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \ + --arg desiredTag "$fromImageName:$fromImageTag" + )" + else + echo "From-image name or tag wasn't set. Reading the first ID." + parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')" fi - # If the tag isn't set, use the name as an index into the json - # and read the first key found. - if [[ -z "$fromImageTag" ]]; then - fromImageTag=$(jshon -e $fromImageName -k < image/repositories \ - | head -n1) - echo "From-image tag wasn't set. Read $fromImageTag." - fi - - # Use the name and tag to get the parent ID field. - parentID=$(jshon -e $fromImageName -e $fromImageTag -u \ - < image/repositories) - cat ./image/manifest.json | jq -r '.[0].Layers | .[]' > layer-list else touch layer-list @@ -305,106 +300,6 @@ rec { ${text} ''; - # Create $maxLayers worth of Docker Layers, one layer per store path - # unless there are more paths than $maxLayers. In that case, create - # $maxLayers-1 for the most popular layers, and smush the remainaing - # store paths in to one final layer. - # - # NOTE: the `closures` parameter is a list of closures to include. - # The TOP LEVEL store paths themselves will never be present in the - # resulting image. At this time (2019-12-16) none of these layers - # are appropriate to include, as they are all created as - # implementation details of dockerTools. - mkManyPureLayers = { - name, - # Files to add to the layer. - closures, - configJson, - # Docker has a 125-layer maximum, we pick 100 to ensure there is - # plenty of room for extension. - # https://github.com/moby/moby/blob/b3e9f7b13b0f0c414fa6253e1f17a86b2cff68b5/layer/layer_store.go#L23-L26 - maxLayers ? 100 - }: - let - storePathToLayer = substituteAll - { shell = runtimeShell; - isExecutable = true; - src = ./store-path-to-layer.sh; - }; - - overallClosure = writeText "closure" (lib.concatStringsSep " " closures); - in - runCommand "${name}-granular-docker-layers" { - inherit maxLayers; - paths = referencesByPopularity overallClosure; - nativeBuildInputs = [ jshon rsync tarsum moreutils ]; - enableParallelBuilding = true; - } - '' - mkdir layers - - # Delete impurities for store path layers, so they don't get - # shared and taint other projects. - cat ${configJson} \ - | jshon -d config \ - | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json - - # WARNING! - # The following code is fiddly w.r.t. ensuring every layer is - # created, and that no paths are missed. If you change the - # following head and tail call lines, double-check that your - # code behaves properly when the number of layers equals: - # maxLayers-1, maxLayers, and maxLayers+1, 0 - paths() { - cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])} - } - - paths | head -n $((maxLayers - 1)) | cat -n | xargs -r -P$NIX_BUILD_CORES -n2 ${storePathToLayer} - if [ $(paths | wc -l) -ge $maxLayers ]; then - paths | tail -n+$maxLayers | xargs ${storePathToLayer} $maxLayers - fi - - echo "Finished building layer '$name'" - - mv ./layers $out - ''; - - # Create a "Customisation" layer which adds symlinks at the root of - # the image to the root paths of the closure. Also add the config - # data like what command to run and the environment to run it in. - mkCustomisationLayer = { - name, - # Files to add to the layer. - contents, - baseJson, - extraCommands, - uid ? 0, gid ? 0, - }: - runCommand "${name}-customisation-layer" { - nativeBuildInputs = [ jshon rsync tarsum ]; - inherit extraCommands; - } - '' - cp -r ${contents}/ ./layer - - if [[ -n $extraCommands ]]; then - chmod ug+w layer - (cd layer; eval "$extraCommands") - fi - - # Tar up the layer and throw it into 'layer.tar', while calculating its checksum. - echo "Packing layer..." - mkdir $out - tarhash=$(tar --transform='s|^\./||' -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf - . | tee $out/layer.tar | tarsum) - - # Add a 'checksum' field to the JSON, with the value set to the - # checksum of the tarball. - cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json - - # Indicate to docker that we're using schema version 1.0. - echo -n "1.0" > $out/VERSION - ''; - # Create a "layer" (set of files). mkPureLayer = { # Name of the layer @@ -541,131 +436,14 @@ rec { ''; }; - buildLayeredImage = { - # Image Name - name, - # Image tag, the Nix's output hash will be used if null - tag ? null, - # Files to put on the image (a nix store path or list of paths). - contents ? [], - # Docker config; e.g. what command to run on the container. - config ? {}, - # Time of creation of the image. Passing "now" will make the - # created date be the time of building. - created ? "1970-01-01T00:00:01Z", - # Optional bash script to run on the files prior to fixturizing the layer. - extraCommands ? "", uid ? 0, gid ? 0, - # We pick 100 to ensure there is plenty of room for extension. I - # believe the actual maximum is 128. - maxLayers ? 100 - }: - assert - (lib.assertMsg (maxLayers > 1) - "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})"); + buildLayeredImage = {name, ...}@args: let - baseName = baseNameOf name; - contentsEnv = symlinkJoin { - name = "bulk-layers"; - paths = if builtins.isList contents - then contents - else [ contents ]; - }; - - configJson = let - pure = writeText "${baseName}-config.json" (builtins.toJSON { - inherit created config; - architecture = buildPackages.go.GOARCH; - os = "linux"; - }); - impure = runCommand "${baseName}-standard-dynamic-date.json" - { nativeBuildInputs = [ jq ]; } - '' - jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out - ''; - in if created == "now" then impure else pure; - - bulkLayers = mkManyPureLayers { - name = baseName; - closures = [ contentsEnv configJson ]; - # One layer will be taken up by the customisationLayer, so - # take up one less. - maxLayers = maxLayers - 1; - inherit configJson; - }; - customisationLayer = mkCustomisationLayer { - name = baseName; - contents = contentsEnv; - baseJson = configJson; - inherit uid gid extraCommands; - }; - result = runCommand "docker-image-${baseName}.tar.gz" { - nativeBuildInputs = [ jshon pigz coreutils findutils jq ]; - # Image name and tag must be lowercase - imageName = lib.toLower name; - baseJson = configJson; - passthru.imageTag = - if tag == null - then lib.head (lib.splitString "-" (lib.last (lib.splitString "/" result))) - else lib.toLower tag; - # Docker can't be made to run darwin binaries - meta.badPlatforms = lib.platforms.darwin; - } '' - ${if (tag == null) then '' - outName="$(basename "$out")" - outHash=$(echo "$outName" | cut -d - -f 1) - - imageTag=$outHash - '' else '' - imageTag="${tag}" - ''} - - find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list - echo ${customisationLayer} >> layer-list - - mkdir image - imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}") - manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]") - for layer in $(cat layer-list); do - layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1) - layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1) - ln -s "$layer" "./image/$layerID" - - manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= . + [\"$layerID/layer.tar\"]") - imageJson=$(echo "$imageJson" | jq ".history |= . + [{\"created\": \"$(jq -r .created ${configJson})\"}]") - imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= . + [\"sha256:$layerChecksum\"]") - done - imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1) - echo "$imageJson" > "image/$imageJsonChecksum.json" - manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"") - echo "$manifestJson" > image/manifest.json - - jshon -n object \ - -n object -s "$layerID" -i "$imageTag" \ - -i "$imageName" > image/repositories - - echo "Cooking the image..." - # tar exits with an exit code of 1 if files changed while it was - # reading them. It considers a change in the number of hard links - # to be a "change", which can cause this to fail if images are being - # built concurrently and the auto-optimise-store nix option is turned on. - # Since the contents of these files will not change, we can reasonably - # ignore this exit code. - set +e - tar -C image --dereference --hard-dereference --sort=name \ - --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 \ - --mode=a-w --xform s:'^./':: --use-compress-program='pigz -nT' \ - --warning=no-file-changed -cf $out . - RET=$? - if [ $RET -ne 0 ] && [ $RET -ne 1 ]; then - exit $RET - fi - set -e - - echo "Finished." - ''; - + stream = streamLayeredImage args; in - result; + runCommand "${name}.tar.gz" { + inherit (stream) imageName; + buildInputs = [ pigz ]; + } "${stream} | pigz -nT > $out"; # 1. extract the base image # 2. create the layer @@ -774,20 +552,22 @@ rec { configName="$(cat ./image/manifest.json | jq -r '.[0].Config')" baseEnvs="$(cat "./image/$configName" | jq '.config.Env // []')" + # Extract the parentID from the manifest + if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then + parentID="$( + cat "image/manifest.json" | + jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \ + --arg desiredTag "$fromImageName:$fromImageTag" + )" + else + echo "From-image name or tag wasn't set. Reading the first ID." + parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')" + fi + # Otherwise do not import the base image configuration and manifest chmod a+w image image/*.json rm -f image/*.json - if [[ -z "$fromImageName" ]]; then - fromImageName=$(jshon -k < image/repositories|head -n1) - fi - if [[ -z "$fromImageTag" ]]; then - fromImageTag=$(jshon -e $fromImageName -k \ - < image/repositories|head -n1) - fi - parentID=$(jshon -e $fromImageName -e $fromImageTag -u \ - < image/repositories) - for l in image/*/layer.tar; do ls_tar $l >> baseFiles done @@ -904,4 +684,110 @@ rec { }) ); + streamLayeredImage = { + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Docker config; e.g. what command to run on the container. + config ? {}, + # Time of creation of the image. Passing "now" will make the + # created date be the time of building. + created ? "1970-01-01T00:00:01Z", + # Optional bash script to run on the files prior to fixturizing the layer. + extraCommands ? "", + # We pick 100 to ensure there is plenty of room for extension. I + # believe the actual maximum is 128. + maxLayers ? 100 + }: + assert + (lib.assertMsg (maxLayers > 1) + "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})"); + let + streamScript = writePython3 "stream" {} ./stream_layered_image.py; + baseJson = writeText "${name}-base.json" (builtins.toJSON { + inherit config; + architecture = buildPackages.go.GOARCH; + os = "linux"; + }); + customisationLayer = runCommand "${name}-customisation-layer" { inherit extraCommands; } '' + cp -r ${contentsEnv}/ $out + + if [[ -n $extraCommands ]]; then + chmod u+w $out + (cd $out; eval "$extraCommands") + fi + ''; + contentsEnv = symlinkJoin { + name = "${name}-bulk-layers"; + paths = if builtins.isList contents + then contents + else [ contents ]; + }; + + # NOTE: the `closures` parameter is a list of closures to include. + # The TOP LEVEL store paths themselves will never be present in the + # resulting image. At this time (2019-12-16) none of these layers + # are appropriate to include, as they are all created as + # implementation details of dockerTools. + closures = [ baseJson contentsEnv ]; + overallClosure = writeText "closure" (lib.concatStringsSep " " closures); + conf = runCommand "${name}-conf.json" { + inherit maxLayers created; + imageName = lib.toLower name; + paths = referencesByPopularity overallClosure; + buildInputs = [ jq ]; + } '' + paths() { + cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])} + } + ${if (tag == null) then '' + outName="$(basename "$out")" + outHash=$(echo "$outName" | cut -d - -f 1) + + imageTag=$outHash + '' else '' + imageTag="${tag}" + ''} + + if [[ "$created" == "now" ]]; then + created="$(TZ=utc date --iso-8601="seconds")" + fi + + # Create $maxLayers worth of Docker Layers, one layer per store path + # unless there are more paths than $maxLayers. In that case, create + # $maxLayers-1 for the most popular layers, and smush the remainaing + # store paths in to one final layer. + store_layers="$( + paths | + jq -sR ' + rtrimstr("\n") | split("\n") + | (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ] + | map(select(length > 0)) + ' \ + --argjson maxLayers "$(( maxLayers - 1 ))" # one layer will be taken up by the customisation layer + )" + + cat ${baseJson} | jq ' + . + { + "store_layers": $store_layers, + "customisation_layer", $customisation_layer, + "repo_tag": $repo_tag, + "created": $created + } + ' --argjson store_layers "$store_layers" \ + --arg customisation_layer ${customisationLayer} \ + --arg repo_tag "$imageName:$imageTag" \ + --arg created "$created" | + tee $out + ''; + result = runCommand "stream-${name}" { + inherit (conf) imageName; + buildInputs = [ makeWrapper ]; + } '' + makeWrapper ${streamScript} $out --add-flags ${conf} + ''; + in result; } diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh deleted file mode 100755 index 3a1fcd0c27a..00000000000 --- a/pkgs/build-support/docker/store-path-to-layer.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!@shell@ - -set -eu - -layerNumber=$1 -shift - -layerPath="./layers/$layerNumber" -echo "Creating layer #$layerNumber for $@" - -mkdir -p "$layerPath" - -# Make sure /nix and /nix/store appear first in the archive. -# -# We create the directories here and use them because -# when there are other things being added to the -# nix store, tar could fail, saying, -# "tar: /nix/store: file changed as we read it" -# -# In addition, we use `__Nix__` instead of `nix` to avoid renaming -# relative symlink destinations like -# /nix/store/...-nix-2.3.4/bin/nix-daemon -> nix -mkdir -p __Nix__/store - -# Then we change into the /nix/store in order to -# avoid a similar "file changed as we read it" error -# as above. Namely, if we use the absolute path of -# /nix/store/123-pkg and something new is added to the nix -# store while tar is running, it will detect a change to -# /nix/store and fail. Instead, if we cd into the nix store -# and copy the relative nix store path, tar will ignore -# changes to /nix/store. In order to create the correct -# structure in the tar file, we transform the relative nix -# store path to the absolute store path. -tarhash=$( - basename -a "$@" | - tar --create --preserve-permissions --absolute-names nix \ - --directory /nix/store --verbatim-files-from --files-from - \ - --hard-dereference --sort=name \ - --mtime="@$SOURCE_DATE_EPOCH" \ - --owner=0 --group=0 \ - --transform 's,^__Nix__$,/nix,' \ - --transform 's,^__Nix__/store$,/nix/store,' \ - --transform 's,^[^/],/nix/store/\0,rS' | - tee "$layerPath/layer.tar" | - tarsum -) - -# Add a 'checksum' field to the JSON, with the value set to the -# checksum of the tarball. -cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json - -# Indicate to docker that we're using schema version 1.0. -echo -n "1.0" > $layerPath/VERSION diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py new file mode 100644 index 00000000000..a0f2cf726e3 --- /dev/null +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -0,0 +1,165 @@ +import io +import os +import re +import sys +import json +import hashlib +import tarfile +import itertools +import threading +from collections import namedtuple + + +# Adds the given store paths to as a tar to the given writable stream. +def archive_paths_to(obj, paths, add_nix, filter=None): + filter = filter if filter else lambda i: i + + # gettarinfo makes the paths relative, this makes them + # absolute again + def append_root(ti): + ti.name = "/" + ti.name + return ti + + def dir(path): + ti = tarfile.TarInfo(path) + ti.type = tarfile.DIRTYPE + return ti + + with tarfile.open(fileobj=obj, mode="w|") as tar: + if add_nix: + tar.addfile(dir("/nix")) + tar.addfile(dir("/nix/store")) + + for path in paths: + ti = tar.gettarinfo(os.path.join("/", path)) + tar.addfile(filter(append_root(ti))) + + for root, dirs, files in os.walk(path, topdown=True): + for name in itertools.chain(dirs, files): + name = os.path.join(root, name) + ti = append_root(tar.gettarinfo(name)) + + # copy hardlinks as regular files + if ti.islnk(): + ti.type = tarfile.REGTYPE + + ti = filter(ti) + if ti.isfile(): + with open(name, "rb") as f: + tar.addfile(ti, f) + else: + tar.addfile(ti) + + +# A writable stream which only calculates the final file size and +# sha256sum, while discarding the actual contents. +class ExtractChecksum: + def __init__(self): + self._digest = hashlib.sha256() + self._size = 0 + + def write(self, data): + self._digest.update(data) + self._size += len(data) + + def extract(self): + return (self._digest.hexdigest(), self._size) + + +# Some metadata for a layer +LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) + + +# Given a list of store paths 'paths', creates a layer add append it +# to tarfile 'tar'. Returns some a 'LayerInfo' for the layer. +def add_layer_dir(tar, paths, add_nix=True, filter=None): + assert all(i.startswith("/nix/store/") for i in paths) + + extract_checksum = ExtractChecksum() + archive_paths_to(extract_checksum, paths, add_nix=add_nix, filter=filter) + (checksum, size) = extract_checksum.extract() + + path = f"{checksum}/layer.tar" + ti = tarfile.TarInfo(path) + ti.size = size + + read_fd, write_fd = os.pipe() + with open(read_fd, "rb") as read, open(write_fd, "wb") as write: + def producer(): + archive_paths_to(write, paths, add_nix=add_nix, filter=filter) + write.close() + threading.Thread(target=producer).start() + tar.addfile(ti, read) + + return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) + + +def add_customisation_layer(tar, path): + def filter(ti): + ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) + return ti + return add_layer_dir(tar, [path], add_nix=False, filter=filter) + + +# Adds a file to the tarball with given path and contents. +def add_bytes(tar, path, content): + assert type(content) is bytes + + ti = tarfile.TarInfo(path) + ti.size = len(content) + tar.addfile(ti, io.BytesIO(content)) + + +# Main + +with open(sys.argv[1], "r") as f: + conf = json.load(f) + +with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: + layers = [] + for num, store_layer in enumerate(conf["store_layers"]): + print( + "Creating layer", num, + "from paths:", store_layer, + file=sys.stderr) + info = add_layer_dir(tar, store_layer) + layers.append(info) + + print("Creating the customisation layer...", file=sys.stderr) + layers.append(add_customisation_layer(tar, conf["customisation_layer"])) + + print("Adding manifests...", file=sys.stderr) + image_json = { + "created": conf["created"], + "architecture": conf["architecture"], + "os": "linux", + "config": conf["config"], + "rootfs": { + "diff_ids": [f"sha256:{layer.checksum}" for layer in layers], + "type": "layers", + }, + "history": [ + { + "created": conf["created"], + "comment": f"store paths: {layer.paths}" + } + for layer in layers + ], + } + + image_json = json.dumps(image_json, indent=4).encode("utf-8") + image_json_checksum = hashlib.sha256(image_json).hexdigest() + image_json_path = f"{image_json_checksum}.json" + add_bytes(tar, image_json_path, image_json) + + manifest_json = [ + { + "Config": image_json_path, + "RepoTags": [conf["repo_tag"]], + "Layers": [layer.path for layer in layers], + } + ] + manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") + add_bytes(tar, "manifest.json", manifest_json) + + print("Done.", file=sys.stderr) diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index e31c42261e7..fa68b749363 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -213,7 +213,9 @@ in grsync = callPackage ../applications/misc/grsync { }; - dockerTools = callPackage ../build-support/docker { }; + dockerTools = callPackage ../build-support/docker { + writePython3 = writers.writePython3; + }; snapTools = callPackage ../build-support/snap { }; From 4e0109f87362ee31f71ee5805806a3915afd3fbd Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Thu, 11 Jun 2020 12:44:04 +1200 Subject: [PATCH 02/13] Set mtime to get more deterministic builds --- pkgs/build-support/docker/default.nix | 5 +- .../docker/stream_layered_image.py | 62 +++++++++++++++---- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index d0ec90264ee..06f2d923569 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -752,8 +752,9 @@ rec { imageTag="${tag}" ''} - if [[ "$created" == "now" ]]; then - created="$(TZ=utc date --iso-8601="seconds")" + # convert "created" to iso format + if [[ "$created" != "now" ]]; then + created="$(date -Iseconds -d "$created")" fi # Create $maxLayers worth of Docker Layers, one layer per store path diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index a0f2cf726e3..fd8c2428d91 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -7,11 +7,12 @@ import hashlib import tarfile import itertools import threading +from datetime import datetime from collections import namedtuple # Adds the given store paths to as a tar to the given writable stream. -def archive_paths_to(obj, paths, add_nix, filter=None): +def archive_paths_to(obj, paths, created, add_nix, filter=None): filter = filter if filter else lambda i: i # gettarinfo makes the paths relative, this makes them @@ -20,6 +21,10 @@ def archive_paths_to(obj, paths, add_nix, filter=None): ti.name = "/" + ti.name return ti + def apply_filters(ti): + ti.mtime = int(created.timestamp()) + return filter(ti) + def dir(path): ti = tarfile.TarInfo(path) ti.type = tarfile.DIRTYPE @@ -27,12 +32,12 @@ def archive_paths_to(obj, paths, add_nix, filter=None): with tarfile.open(fileobj=obj, mode="w|") as tar: if add_nix: - tar.addfile(dir("/nix")) - tar.addfile(dir("/nix/store")) + tar.addfile(apply_filters(dir("/nix"))) + tar.addfile(apply_filters(dir("/nix/store"))) for path in paths: ti = tar.gettarinfo(os.path.join("/", path)) - tar.addfile(filter(append_root(ti))) + tar.addfile(apply_filters(append_root(ti))) for root, dirs, files in os.walk(path, topdown=True): for name in itertools.chain(dirs, files): @@ -43,7 +48,7 @@ def archive_paths_to(obj, paths, add_nix, filter=None): if ti.islnk(): ti.type = tarfile.REGTYPE - ti = filter(ti) + ti = apply_filters(ti) if ti.isfile(): with open(name, "rb") as f: tar.addfile(ti, f) @@ -72,11 +77,17 @@ LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) # Given a list of store paths 'paths', creates a layer add append it # to tarfile 'tar'. Returns some a 'LayerInfo' for the layer. -def add_layer_dir(tar, paths, add_nix=True, filter=None): +def add_layer_dir(tar, paths, created, add_nix=True, filter=None): assert all(i.startswith("/nix/store/") for i in paths) extract_checksum = ExtractChecksum() - archive_paths_to(extract_checksum, paths, add_nix=add_nix, filter=filter) + archive_paths_to( + extract_checksum, + paths, + created=created, + add_nix=add_nix, + filter=filter + ) (checksum, size) = extract_checksum.extract() path = f"{checksum}/layer.tar" @@ -86,7 +97,13 @@ def add_layer_dir(tar, paths, add_nix=True, filter=None): read_fd, write_fd = os.pipe() with open(read_fd, "rb") as read, open(write_fd, "wb") as write: def producer(): - archive_paths_to(write, paths, add_nix=add_nix, filter=filter) + archive_paths_to( + write, + paths, + created=created, + add_nix=add_nix, + filter=filter + ) write.close() threading.Thread(target=producer).start() tar.addfile(ti, read) @@ -94,11 +111,17 @@ def add_layer_dir(tar, paths, add_nix=True, filter=None): return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) -def add_customisation_layer(tar, path): +def add_customisation_layer(tar, path, created): def filter(ti): ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) return ti - return add_layer_dir(tar, [path], add_nix=False, filter=filter) + return add_layer_dir( + tar, + [path], + created=created, + add_nix=False, + filter=filter + ) # Adds a file to the tarball with given path and contents. @@ -115,6 +138,12 @@ def add_bytes(tar, path, content): with open(sys.argv[1], "r") as f: conf = json.load(f) +created = ( + datetime.now(tz=datetime.timezone.utc) + if conf["created"] == "now" + else datetime.fromisoformat(conf["created"]) +) + with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: layers = [] for num, store_layer in enumerate(conf["store_layers"]): @@ -122,15 +151,22 @@ with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: "Creating layer", num, "from paths:", store_layer, file=sys.stderr) - info = add_layer_dir(tar, store_layer) + info = add_layer_dir(tar, store_layer, created=created) layers.append(info) print("Creating the customisation layer...", file=sys.stderr) - layers.append(add_customisation_layer(tar, conf["customisation_layer"])) + layers.append( + add_customisation_layer( + tar, + conf["customisation_layer"], + created=created + ) + ) print("Adding manifests...", file=sys.stderr) + image_json = { - "created": conf["created"], + "created": datetime.isoformat(created), "architecture": conf["architecture"], "os": "linux", "config": conf["config"], From a5a611cacbc11d267d394f2a7bde06c098ef65af Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Thu, 11 Jun 2020 12:51:47 +1200 Subject: [PATCH 03/13] Set mtime in even more places --- .../docker/stream_layered_image.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index fd8c2428d91..f502e855e95 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -12,7 +12,7 @@ from collections import namedtuple # Adds the given store paths to as a tar to the given writable stream. -def archive_paths_to(obj, paths, created, add_nix, filter=None): +def archive_paths_to(obj, paths, mtime, add_nix, filter=None): filter = filter if filter else lambda i: i # gettarinfo makes the paths relative, this makes them @@ -22,7 +22,7 @@ def archive_paths_to(obj, paths, created, add_nix, filter=None): return ti def apply_filters(ti): - ti.mtime = int(created.timestamp()) + ti.mtime = mtime return filter(ti) def dir(path): @@ -77,14 +77,14 @@ LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) # Given a list of store paths 'paths', creates a layer add append it # to tarfile 'tar'. Returns some a 'LayerInfo' for the layer. -def add_layer_dir(tar, paths, created, add_nix=True, filter=None): +def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): assert all(i.startswith("/nix/store/") for i in paths) extract_checksum = ExtractChecksum() archive_paths_to( extract_checksum, paths, - created=created, + mtime=mtime, add_nix=add_nix, filter=filter ) @@ -93,6 +93,7 @@ def add_layer_dir(tar, paths, created, add_nix=True, filter=None): path = f"{checksum}/layer.tar" ti = tarfile.TarInfo(path) ti.size = size + ti.mtime = mtime read_fd, write_fd = os.pipe() with open(read_fd, "rb") as read, open(write_fd, "wb") as write: @@ -100,7 +101,7 @@ def add_layer_dir(tar, paths, created, add_nix=True, filter=None): archive_paths_to( write, paths, - created=created, + mtime=mtime, add_nix=add_nix, filter=filter ) @@ -111,25 +112,26 @@ def add_layer_dir(tar, paths, created, add_nix=True, filter=None): return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) -def add_customisation_layer(tar, path, created): +def add_customisation_layer(tar, path, mtime): def filter(ti): ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) return ti return add_layer_dir( tar, [path], - created=created, + mtime=mtime, add_nix=False, filter=filter ) # Adds a file to the tarball with given path and contents. -def add_bytes(tar, path, content): +def add_bytes(tar, path, content, mtime): assert type(content) is bytes ti = tarfile.TarInfo(path) ti.size = len(content) + ti.mtime = mtime tar.addfile(ti, io.BytesIO(content)) @@ -143,6 +145,7 @@ created = ( if conf["created"] == "now" else datetime.fromisoformat(conf["created"]) ) +mtime = int(created.timestamp()) with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: layers = [] @@ -151,7 +154,7 @@ with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: "Creating layer", num, "from paths:", store_layer, file=sys.stderr) - info = add_layer_dir(tar, store_layer, created=created) + info = add_layer_dir(tar, store_layer, mtime=mtime) layers.append(info) print("Creating the customisation layer...", file=sys.stderr) @@ -159,7 +162,7 @@ with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: add_customisation_layer( tar, conf["customisation_layer"], - created=created + mtime=mtime ) ) @@ -186,7 +189,7 @@ with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: image_json = json.dumps(image_json, indent=4).encode("utf-8") image_json_checksum = hashlib.sha256(image_json).hexdigest() image_json_path = f"{image_json_checksum}.json" - add_bytes(tar, image_json_path, image_json) + add_bytes(tar, image_json_path, image_json, mtime=mtime) manifest_json = [ { @@ -196,6 +199,6 @@ with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: } ] manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") - add_bytes(tar, "manifest.json", manifest_json) + add_bytes(tar, "manifest.json", manifest_json, mtime=mtime) print("Done.", file=sys.stderr) From 4ab7baf6f61f2c89bbd4f3825546b48adcd031c4 Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Thu, 18 Jun 2020 14:29:59 +1200 Subject: [PATCH 04/13] stream_layered_image.py: comments --- .../docker/stream_layered_image.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index f502e855e95..6687f49e1bc 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -1,3 +1,36 @@ +""" +This script generates a Docker image from a set of store paths. Uses +Docker Image Specification v1.2 as reference [1]. + +It expects a JSON file with the following properties and writes the +image as an uncompressed tarball to stdout: + +* "architecture", "config", "os", "created", "repo_tag" correspond to + the fields with the same name on the image spec [2]. +* "created" can be "now". +* "created" is also used as mtime for files added to the image. +* "store_layers" is a list of layers in ascending order, where each + layer is the list of store paths to include in that layer. + +The main challenge for this script to create the final image in a +streaming fashion, without dumping any intermediate data to disk +for performance. + +A docker image has each layer contents archived as separate tarballs, +and they later all get enveloped into a single big tarball in a +content addressed fashion. However, because how "tar" format works, +we have to know about the name (which includes the checksum in our +case) and the size of the tarball before we can start adding it to the +outer tarball. We achieve that by creating the layer tarballs twice; +on the first iteration we calculate the file size and the checksum, +and on the second one we actually stream the contents. 'add_layer_dir' +function does all this. + +[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md +[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions +""" # noqa: E501 + + import io import os import re @@ -31,6 +64,9 @@ def archive_paths_to(obj, paths, mtime, add_nix, filter=None): return ti with tarfile.open(fileobj=obj, mode="w|") as tar: + # To be consistent with the docker utilities, we need to have + # these directories first when building layer tarballs. But + # we don't need them on the customisation layer. if add_nix: tar.addfile(apply_filters(dir("/nix"))) tar.addfile(apply_filters(dir("/nix/store"))) @@ -80,6 +116,7 @@ LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): assert all(i.startswith("/nix/store/") for i in paths) + # First, calculate the tarball checksum and the size. extract_checksum = ExtractChecksum() archive_paths_to( extract_checksum, @@ -95,6 +132,7 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): ti.size = size ti.mtime = mtime + # Then actually stream the contents to the outer tarball. read_fd, write_fd = os.pipe() with open(read_fd, "rb") as read, open(write_fd, "wb") as write: def producer(): @@ -106,12 +144,20 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): filter=filter ) write.close() + + # Closing the write end of the fifo also closes the read end, + # so we don't need to wait until this thread is finished. + # + # Any exception from the thread will get printed by the default + # exception handler, and the 'addfile' call will fail since it + # won't be able to read required amount of bytes. threading.Thread(target=producer).start() tar.addfile(ti, read) return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) +# Adds the contents of the store path to the root as a new layer. def add_customisation_layer(tar, path, mtime): def filter(ti): ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) From 307804d97d1b2b81dcb792cee62afdad435b28a8 Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Thu, 18 Jun 2020 14:34:27 +1200 Subject: [PATCH 05/13] dockerTools.streamLayeredImage: comments --- pkgs/build-support/docker/default.nix | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 06f2d923569..11d5c2b3642 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -729,7 +729,7 @@ rec { # NOTE: the `closures` parameter is a list of closures to include. # The TOP LEVEL store paths themselves will never be present in the - # resulting image. At this time (2019-12-16) none of these layers + # resulting image. At this time (2020-06-18) none of these layers # are appropriate to include, as they are all created as # implementation details of dockerTools. closures = [ baseJson contentsEnv ]; @@ -761,6 +761,12 @@ rec { # unless there are more paths than $maxLayers. In that case, create # $maxLayers-1 for the most popular layers, and smush the remainaing # store paths in to one final layer. + # + # The following code is fiddly w.r.t. ensuring every layer is + # created, and that no paths are missed. If you change the + # following lines, double-check that your code behaves properly + # when the number of layers equals: + # maxLayers-1, maxLayers, and maxLayers+1, 0 store_layers="$( paths | jq -sR ' From 26402290bfdec5b012dc66c1d0418ad7ffdb284e Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 11:46:15 +1200 Subject: [PATCH 06/13] stream_layered_image: Add main method --- .../docker/stream_layered_image.py | 121 +++++++++--------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index 6687f49e1bc..44c6cbba21c 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -181,70 +181,73 @@ def add_bytes(tar, path, content, mtime): tar.addfile(ti, io.BytesIO(content)) -# Main +def main(): + with open(sys.argv[1], "r") as f: + conf = json.load(f) -with open(sys.argv[1], "r") as f: - conf = json.load(f) - -created = ( - datetime.now(tz=datetime.timezone.utc) - if conf["created"] == "now" - else datetime.fromisoformat(conf["created"]) -) -mtime = int(created.timestamp()) - -with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: - layers = [] - for num, store_layer in enumerate(conf["store_layers"]): - print( - "Creating layer", num, - "from paths:", store_layer, - file=sys.stderr) - info = add_layer_dir(tar, store_layer, mtime=mtime) - layers.append(info) - - print("Creating the customisation layer...", file=sys.stderr) - layers.append( - add_customisation_layer( - tar, - conf["customisation_layer"], - mtime=mtime - ) + created = ( + datetime.now(tz=datetime.timezone.utc) + if conf["created"] == "now" + else datetime.fromisoformat(conf["created"]) ) + mtime = int(created.timestamp()) - print("Adding manifests...", file=sys.stderr) + with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: + layers = [] + for num, store_layer in enumerate(conf["store_layers"]): + print( + "Creating layer", num, + "from paths:", store_layer, + file=sys.stderr) + info = add_layer_dir(tar, store_layer, mtime=mtime) + layers.append(info) - image_json = { - "created": datetime.isoformat(created), - "architecture": conf["architecture"], - "os": "linux", - "config": conf["config"], - "rootfs": { - "diff_ids": [f"sha256:{layer.checksum}" for layer in layers], - "type": "layers", - }, - "history": [ - { - "created": conf["created"], - "comment": f"store paths: {layer.paths}" - } - for layer in layers - ], - } + print("Creating the customisation layer...", file=sys.stderr) + layers.append( + add_customisation_layer( + tar, + conf["customisation_layer"], + mtime=mtime + ) + ) - image_json = json.dumps(image_json, indent=4).encode("utf-8") - image_json_checksum = hashlib.sha256(image_json).hexdigest() - image_json_path = f"{image_json_checksum}.json" - add_bytes(tar, image_json_path, image_json, mtime=mtime) + print("Adding manifests...", file=sys.stderr) - manifest_json = [ - { - "Config": image_json_path, - "RepoTags": [conf["repo_tag"]], - "Layers": [layer.path for layer in layers], + image_json = { + "created": datetime.isoformat(created), + "architecture": conf["architecture"], + "os": "linux", + "config": conf["config"], + "rootfs": { + "diff_ids": [f"sha256:{layer.checksum}" for layer in layers], + "type": "layers", + }, + "history": [ + { + "created": conf["created"], + "comment": f"store paths: {layer.paths}" + } + for layer in layers + ], } - ] - manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") - add_bytes(tar, "manifest.json", manifest_json, mtime=mtime) - print("Done.", file=sys.stderr) + image_json = json.dumps(image_json, indent=4).encode("utf-8") + image_json_checksum = hashlib.sha256(image_json).hexdigest() + image_json_path = f"{image_json_checksum}.json" + add_bytes(tar, image_json_path, image_json, mtime=mtime) + + manifest_json = [ + { + "Config": image_json_path, + "RepoTags": [conf["repo_tag"]], + "Layers": [layer.path for layer in layers], + } + ] + manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") + add_bytes(tar, "manifest.json", manifest_json, mtime=mtime) + + print("Done.", file=sys.stderr) + + +if __name__ == "__main__": + main() From f1d7a09146678b60f78b08c2dec3ce14dc59499f Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 12:09:22 +1200 Subject: [PATCH 07/13] stream_layered_image: Add docstrings to functions --- .../docker/stream_layered_image.py | 64 +++++++++++++++++-- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index 44c6cbba21c..d7267b25672 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -44,8 +44,19 @@ from datetime import datetime from collections import namedtuple -# Adds the given store paths to as a tar to the given writable stream. def archive_paths_to(obj, paths, mtime, add_nix, filter=None): + """ + Writes the given store paths as a tar file to the given stream. + + obj: Stream to write to. Should have a 'write' method. + paths: List of store paths. + add_nix: Whether /nix and /nix/store directories should be + prepended to the archive. + filter: An optional transformation to be applied to TarInfo + objects. Should take a single TarInfo object and return + another one. Defaults to identity. + """ + filter = filter if filter else lambda i: i # gettarinfo makes the paths relative, this makes them @@ -92,9 +103,12 @@ def archive_paths_to(obj, paths, mtime, add_nix, filter=None): tar.addfile(ti) -# A writable stream which only calculates the final file size and -# sha256sum, while discarding the actual contents. class ExtractChecksum: + """ + A writable stream which only calculates the final file size and + sha256sum, while discarding the actual contents. + """ + def __init__(self): self._digest = hashlib.sha256() self._size = 0 @@ -104,6 +118,9 @@ class ExtractChecksum: self._size += len(data) def extract(self): + """ + Returns: Hex-encoded sha256sum and size as a tuple. + """ return (self._digest.hexdigest(), self._size) @@ -111,9 +128,24 @@ class ExtractChecksum: LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) -# Given a list of store paths 'paths', creates a layer add append it -# to tarfile 'tar'. Returns some a 'LayerInfo' for the layer. def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): + """ + Appends given store paths to a TarFile object as a new layer. + + tar: 'tarfile.TarFile' object for the new layer to be added to. + paths: List of store paths. + mtime: 'mtime' of the added files and the layer tarball. + Should be an integer representing a POSIX time. + add_nix: Whether /nix and /nix/store directories should be + added to a layer. + filter: An optional transformation to be applied to TarInfo + objects inside the layer. Should take a single TarInfo + object and return another one. Defaults to identity. + + Returns: A 'LayerInfo' object containing some metadata of + the layer added. + """ + assert all(i.startswith("/nix/store/") for i in paths) # First, calculate the tarball checksum and the size. @@ -157,8 +189,19 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) -# Adds the contents of the store path to the root as a new layer. def add_customisation_layer(tar, path, mtime): + """ + Adds the contents of the store path as a new layer. This is different + than the 'add_layer_dir' function defaults in the sense that the contents + of a single store path will be added to the root of the layer. eg (without + the /nix/store prefix). + + tar: 'tarfile.TarFile' object for the new layer to be added to. + path: A store path. + mtime: 'mtime' of the added files and the layer tarball. Should be an + integer representing a POSIX time. + """ + def filter(ti): ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) return ti @@ -171,8 +214,15 @@ def add_customisation_layer(tar, path, mtime): ) -# Adds a file to the tarball with given path and contents. def add_bytes(tar, path, content, mtime): + """ + Adds a file to the tarball with given path and contents. + + tar: 'tarfile.TarFile' object. + path: Path of the file as a string. + content: Contents of the file. + mtime: 'mtime' of the file. Should be an integer representing a POSIX time. + """ assert type(content) is bytes ti = tarfile.TarInfo(path) From 4bff5b7f3d2bfd1705a10ee7f85d2494ac7bac9f Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 12:11:56 +1200 Subject: [PATCH 08/13] dockerTools: Properly quote a shell variable --- pkgs/build-support/docker/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 11d5c2b3642..7ff325382a6 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -333,7 +333,7 @@ rec { chmod ug+w layer - if [[ -n $extraCommands ]]; then + if [[ -n "$extraCommands" ]]; then (cd layer; eval "$extraCommands") fi From 87a538e149cbf41361afda3b417eac1632f00c5b Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 12:14:52 +1200 Subject: [PATCH 09/13] stream_layered_image: Use more descriptive variable name --- pkgs/build-support/docker/stream_layered_image.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index d7267b25672..bbad5f5023c 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -160,9 +160,9 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): (checksum, size) = extract_checksum.extract() path = f"{checksum}/layer.tar" - ti = tarfile.TarInfo(path) - ti.size = size - ti.mtime = mtime + layer_tarinfo = tarfile.TarInfo(path) + layer_tarinfo.size = size + layer_tarinfo.mtime = mtime # Then actually stream the contents to the outer tarball. read_fd, write_fd = os.pipe() @@ -184,7 +184,7 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): # exception handler, and the 'addfile' call will fail since it # won't be able to read required amount of bytes. threading.Thread(target=producer).start() - tar.addfile(ti, read) + tar.addfile(layer_tarinfo, read) return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) From fe71abfc12ede6700f599a08c7528fe66461486f Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 12:18:28 +1200 Subject: [PATCH 10/13] stream_layered_image: Clarify assertion failure --- pkgs/build-support/docker/stream_layered_image.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index bbad5f5023c..5d251e8bfb0 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -146,7 +146,9 @@ def add_layer_dir(tar, paths, mtime, add_nix=True, filter=None): the layer added. """ - assert all(i.startswith("/nix/store/") for i in paths) + invalid_paths = [i for i in paths if not i.startswith("/nix/store/")] + assert len(invalid_paths) == 0, \ + "Expecting absolute store paths, but got: {invalid_paths}" # First, calculate the tarball checksum and the size. extract_checksum = ExtractChecksum() From 16199521eaaf5d31952c19e44bcca205959df57f Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Sun, 21 Jun 2020 12:23:55 +1200 Subject: [PATCH 11/13] stream_layered_image: Always set uid and gid to root --- pkgs/build-support/docker/stream_layered_image.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index 5d251e8bfb0..a9de10f9efa 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -67,6 +67,10 @@ def archive_paths_to(obj, paths, mtime, add_nix, filter=None): def apply_filters(ti): ti.mtime = mtime + ti.uid = 0 + ti.gid = 0 + ti.uname = "root" + ti.gname = "root" return filter(ti) def dir(path): From fa8f2bf34f3f065571670c59217f877a73868f8f Mon Sep 17 00:00:00 2001 From: Utku Demir Date: Mon, 22 Jun 2020 15:11:04 +1200 Subject: [PATCH 12/13] stream_layered_image: Use pathlib module for directory traversal --- .../docker/stream_layered_image.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py index a9de10f9efa..4348513338d 100644 --- a/pkgs/build-support/docker/stream_layered_image.py +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -37,8 +37,8 @@ import re import sys import json import hashlib +import pathlib import tarfile -import itertools import threading from datetime import datetime from collections import namedtuple @@ -90,21 +90,20 @@ def archive_paths_to(obj, paths, mtime, add_nix, filter=None): ti = tar.gettarinfo(os.path.join("/", path)) tar.addfile(apply_filters(append_root(ti))) - for root, dirs, files in os.walk(path, topdown=True): - for name in itertools.chain(dirs, files): - name = os.path.join(root, name) - ti = append_root(tar.gettarinfo(name)) + for filename in pathlib.Path(path).rglob("*"): + ti = append_root(tar.gettarinfo(filename)) - # copy hardlinks as regular files - if ti.islnk(): - ti.type = tarfile.REGTYPE + # copy hardlinks as regular files + if ti.islnk(): + ti.type = tarfile.REGTYPE + ti.linkname = "" - ti = apply_filters(ti) - if ti.isfile(): - with open(name, "rb") as f: - tar.addfile(ti, f) - else: - tar.addfile(ti) + ti = apply_filters(ti) + if ti.isfile(): + with open(filename, "rb") as f: + tar.addfile(ti, f) + else: + tar.addfile(ti) class ExtractChecksum: From c9e6a02c822936b2d9156151031f40d6c2e5435d Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Mon, 22 Jun 2020 15:11:46 +1200 Subject: [PATCH 13/13] Add docs for dockerTools.streamLayeredImage --- doc/builders/images/dockertools.xml | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/builders/images/dockertools.xml b/doc/builders/images/dockertools.xml index e7f37fdaaf0..126698d0a9e 100644 --- a/doc/builders/images/dockertools.xml +++ b/doc/builders/images/dockertools.xml @@ -166,7 +166,7 @@ hello latest de2bf4786de6 About a minute ago 25.2MB buildLayeredImage - Create a Docker image with many of the store paths being on their own layer to improve sharing between images. + Create a Docker image with many of the store paths being on their own layer to improve sharing between images. The image is realized into the Nix store as a gzipped tarball. Depending on the intended usage, many users might prefer to use streamLayeredImage instead, which this function uses internally. @@ -327,6 +327,27 @@ pkgs.dockerTools.buildLayeredImage { +
+ streamLayeredImage + + + Builds a script which, when run, will stream an uncompressed tarball of a Docker image to stdout. The arguments to this function are as for buildLayeredImage. This method of constructing an image does not realize the image into the Nix store, so it saves on IO and disk/cache space, particularly with large images. + + + + The image produced by running the output script can be piped directly into docker load, to load it into the local docker daemon: + + + + Alternatively, the image be piped via gzip into skopeo, e.g. to copy it into a registry: + + +
+
pullImage