diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 83f4a9e0c01..d0ec90264ee 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -11,6 +11,7 @@ jq, jshon, lib, + makeWrapper, moreutils, nix, pigz, @@ -29,6 +30,7 @@ writeReferencesToFile, writeScript, writeText, + writePython3, }: # WARNING: this API is unstable and may be subject to backwards-incompatible changes in the future. @@ -204,24 +206,17 @@ rec { mkdir image tar -C image -xpf "$fromImage" - # If the image name isn't set, read it from the image repository json. - if [[ -z "$fromImageName" ]]; then - fromImageName=$(jshon -k < image/repositories | head -n 1) - echo "From-image name wasn't set. Read $fromImageName." + if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then + parentID="$( + cat "image/manifest.json" | + jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \ + --arg desiredTag "$fromImageName:$fromImageTag" + )" + else + echo "From-image name or tag wasn't set. Reading the first ID." + parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')" fi - # If the tag isn't set, use the name as an index into the json - # and read the first key found. - if [[ -z "$fromImageTag" ]]; then - fromImageTag=$(jshon -e $fromImageName -k < image/repositories \ - | head -n1) - echo "From-image tag wasn't set. Read $fromImageTag." - fi - - # Use the name and tag to get the parent ID field. - parentID=$(jshon -e $fromImageName -e $fromImageTag -u \ - < image/repositories) - cat ./image/manifest.json | jq -r '.[0].Layers | .[]' > layer-list else touch layer-list @@ -305,106 +300,6 @@ rec { ${text} ''; - # Create $maxLayers worth of Docker Layers, one layer per store path - # unless there are more paths than $maxLayers. In that case, create - # $maxLayers-1 for the most popular layers, and smush the remainaing - # store paths in to one final layer. - # - # NOTE: the `closures` parameter is a list of closures to include. - # The TOP LEVEL store paths themselves will never be present in the - # resulting image. At this time (2019-12-16) none of these layers - # are appropriate to include, as they are all created as - # implementation details of dockerTools. - mkManyPureLayers = { - name, - # Files to add to the layer. - closures, - configJson, - # Docker has a 125-layer maximum, we pick 100 to ensure there is - # plenty of room for extension. - # https://github.com/moby/moby/blob/b3e9f7b13b0f0c414fa6253e1f17a86b2cff68b5/layer/layer_store.go#L23-L26 - maxLayers ? 100 - }: - let - storePathToLayer = substituteAll - { shell = runtimeShell; - isExecutable = true; - src = ./store-path-to-layer.sh; - }; - - overallClosure = writeText "closure" (lib.concatStringsSep " " closures); - in - runCommand "${name}-granular-docker-layers" { - inherit maxLayers; - paths = referencesByPopularity overallClosure; - nativeBuildInputs = [ jshon rsync tarsum moreutils ]; - enableParallelBuilding = true; - } - '' - mkdir layers - - # Delete impurities for store path layers, so they don't get - # shared and taint other projects. - cat ${configJson} \ - | jshon -d config \ - | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json - - # WARNING! - # The following code is fiddly w.r.t. ensuring every layer is - # created, and that no paths are missed. If you change the - # following head and tail call lines, double-check that your - # code behaves properly when the number of layers equals: - # maxLayers-1, maxLayers, and maxLayers+1, 0 - paths() { - cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])} - } - - paths | head -n $((maxLayers - 1)) | cat -n | xargs -r -P$NIX_BUILD_CORES -n2 ${storePathToLayer} - if [ $(paths | wc -l) -ge $maxLayers ]; then - paths | tail -n+$maxLayers | xargs ${storePathToLayer} $maxLayers - fi - - echo "Finished building layer '$name'" - - mv ./layers $out - ''; - - # Create a "Customisation" layer which adds symlinks at the root of - # the image to the root paths of the closure. Also add the config - # data like what command to run and the environment to run it in. - mkCustomisationLayer = { - name, - # Files to add to the layer. - contents, - baseJson, - extraCommands, - uid ? 0, gid ? 0, - }: - runCommand "${name}-customisation-layer" { - nativeBuildInputs = [ jshon rsync tarsum ]; - inherit extraCommands; - } - '' - cp -r ${contents}/ ./layer - - if [[ -n $extraCommands ]]; then - chmod ug+w layer - (cd layer; eval "$extraCommands") - fi - - # Tar up the layer and throw it into 'layer.tar', while calculating its checksum. - echo "Packing layer..." - mkdir $out - tarhash=$(tar --transform='s|^\./||' -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf - . | tee $out/layer.tar | tarsum) - - # Add a 'checksum' field to the JSON, with the value set to the - # checksum of the tarball. - cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json - - # Indicate to docker that we're using schema version 1.0. - echo -n "1.0" > $out/VERSION - ''; - # Create a "layer" (set of files). mkPureLayer = { # Name of the layer @@ -541,131 +436,14 @@ rec { ''; }; - buildLayeredImage = { - # Image Name - name, - # Image tag, the Nix's output hash will be used if null - tag ? null, - # Files to put on the image (a nix store path or list of paths). - contents ? [], - # Docker config; e.g. what command to run on the container. - config ? {}, - # Time of creation of the image. Passing "now" will make the - # created date be the time of building. - created ? "1970-01-01T00:00:01Z", - # Optional bash script to run on the files prior to fixturizing the layer. - extraCommands ? "", uid ? 0, gid ? 0, - # We pick 100 to ensure there is plenty of room for extension. I - # believe the actual maximum is 128. - maxLayers ? 100 - }: - assert - (lib.assertMsg (maxLayers > 1) - "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})"); + buildLayeredImage = {name, ...}@args: let - baseName = baseNameOf name; - contentsEnv = symlinkJoin { - name = "bulk-layers"; - paths = if builtins.isList contents - then contents - else [ contents ]; - }; - - configJson = let - pure = writeText "${baseName}-config.json" (builtins.toJSON { - inherit created config; - architecture = buildPackages.go.GOARCH; - os = "linux"; - }); - impure = runCommand "${baseName}-standard-dynamic-date.json" - { nativeBuildInputs = [ jq ]; } - '' - jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out - ''; - in if created == "now" then impure else pure; - - bulkLayers = mkManyPureLayers { - name = baseName; - closures = [ contentsEnv configJson ]; - # One layer will be taken up by the customisationLayer, so - # take up one less. - maxLayers = maxLayers - 1; - inherit configJson; - }; - customisationLayer = mkCustomisationLayer { - name = baseName; - contents = contentsEnv; - baseJson = configJson; - inherit uid gid extraCommands; - }; - result = runCommand "docker-image-${baseName}.tar.gz" { - nativeBuildInputs = [ jshon pigz coreutils findutils jq ]; - # Image name and tag must be lowercase - imageName = lib.toLower name; - baseJson = configJson; - passthru.imageTag = - if tag == null - then lib.head (lib.splitString "-" (lib.last (lib.splitString "/" result))) - else lib.toLower tag; - # Docker can't be made to run darwin binaries - meta.badPlatforms = lib.platforms.darwin; - } '' - ${if (tag == null) then '' - outName="$(basename "$out")" - outHash=$(echo "$outName" | cut -d - -f 1) - - imageTag=$outHash - '' else '' - imageTag="${tag}" - ''} - - find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list - echo ${customisationLayer} >> layer-list - - mkdir image - imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}") - manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]") - for layer in $(cat layer-list); do - layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1) - layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1) - ln -s "$layer" "./image/$layerID" - - manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= . + [\"$layerID/layer.tar\"]") - imageJson=$(echo "$imageJson" | jq ".history |= . + [{\"created\": \"$(jq -r .created ${configJson})\"}]") - imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= . + [\"sha256:$layerChecksum\"]") - done - imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1) - echo "$imageJson" > "image/$imageJsonChecksum.json" - manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"") - echo "$manifestJson" > image/manifest.json - - jshon -n object \ - -n object -s "$layerID" -i "$imageTag" \ - -i "$imageName" > image/repositories - - echo "Cooking the image..." - # tar exits with an exit code of 1 if files changed while it was - # reading them. It considers a change in the number of hard links - # to be a "change", which can cause this to fail if images are being - # built concurrently and the auto-optimise-store nix option is turned on. - # Since the contents of these files will not change, we can reasonably - # ignore this exit code. - set +e - tar -C image --dereference --hard-dereference --sort=name \ - --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 \ - --mode=a-w --xform s:'^./':: --use-compress-program='pigz -nT' \ - --warning=no-file-changed -cf $out . - RET=$? - if [ $RET -ne 0 ] && [ $RET -ne 1 ]; then - exit $RET - fi - set -e - - echo "Finished." - ''; - + stream = streamLayeredImage args; in - result; + runCommand "${name}.tar.gz" { + inherit (stream) imageName; + buildInputs = [ pigz ]; + } "${stream} | pigz -nT > $out"; # 1. extract the base image # 2. create the layer @@ -774,20 +552,22 @@ rec { configName="$(cat ./image/manifest.json | jq -r '.[0].Config')" baseEnvs="$(cat "./image/$configName" | jq '.config.Env // []')" + # Extract the parentID from the manifest + if [[ -n "$fromImageName" ]] && [[ -n "$fromImageTag" ]]; then + parentID="$( + cat "image/manifest.json" | + jq -r '.[] | select(.RepoTags | contains([$desiredTag])) | rtrimstr(".json")' \ + --arg desiredTag "$fromImageName:$fromImageTag" + )" + else + echo "From-image name or tag wasn't set. Reading the first ID." + parentID="$(cat "image/manifest.json" | jq -r '.[0].Config | rtrimstr(".json")')" + fi + # Otherwise do not import the base image configuration and manifest chmod a+w image image/*.json rm -f image/*.json - if [[ -z "$fromImageName" ]]; then - fromImageName=$(jshon -k < image/repositories|head -n1) - fi - if [[ -z "$fromImageTag" ]]; then - fromImageTag=$(jshon -e $fromImageName -k \ - < image/repositories|head -n1) - fi - parentID=$(jshon -e $fromImageName -e $fromImageTag -u \ - < image/repositories) - for l in image/*/layer.tar; do ls_tar $l >> baseFiles done @@ -904,4 +684,110 @@ rec { }) ); + streamLayeredImage = { + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Docker config; e.g. what command to run on the container. + config ? {}, + # Time of creation of the image. Passing "now" will make the + # created date be the time of building. + created ? "1970-01-01T00:00:01Z", + # Optional bash script to run on the files prior to fixturizing the layer. + extraCommands ? "", + # We pick 100 to ensure there is plenty of room for extension. I + # believe the actual maximum is 128. + maxLayers ? 100 + }: + assert + (lib.assertMsg (maxLayers > 1) + "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})"); + let + streamScript = writePython3 "stream" {} ./stream_layered_image.py; + baseJson = writeText "${name}-base.json" (builtins.toJSON { + inherit config; + architecture = buildPackages.go.GOARCH; + os = "linux"; + }); + customisationLayer = runCommand "${name}-customisation-layer" { inherit extraCommands; } '' + cp -r ${contentsEnv}/ $out + + if [[ -n $extraCommands ]]; then + chmod u+w $out + (cd $out; eval "$extraCommands") + fi + ''; + contentsEnv = symlinkJoin { + name = "${name}-bulk-layers"; + paths = if builtins.isList contents + then contents + else [ contents ]; + }; + + # NOTE: the `closures` parameter is a list of closures to include. + # The TOP LEVEL store paths themselves will never be present in the + # resulting image. At this time (2019-12-16) none of these layers + # are appropriate to include, as they are all created as + # implementation details of dockerTools. + closures = [ baseJson contentsEnv ]; + overallClosure = writeText "closure" (lib.concatStringsSep " " closures); + conf = runCommand "${name}-conf.json" { + inherit maxLayers created; + imageName = lib.toLower name; + paths = referencesByPopularity overallClosure; + buildInputs = [ jq ]; + } '' + paths() { + cat $paths ${lib.concatMapStringsSep " " (path: "| (grep -v ${path} || true)") (closures ++ [ overallClosure ])} + } + ${if (tag == null) then '' + outName="$(basename "$out")" + outHash=$(echo "$outName" | cut -d - -f 1) + + imageTag=$outHash + '' else '' + imageTag="${tag}" + ''} + + if [[ "$created" == "now" ]]; then + created="$(TZ=utc date --iso-8601="seconds")" + fi + + # Create $maxLayers worth of Docker Layers, one layer per store path + # unless there are more paths than $maxLayers. In that case, create + # $maxLayers-1 for the most popular layers, and smush the remainaing + # store paths in to one final layer. + store_layers="$( + paths | + jq -sR ' + rtrimstr("\n") | split("\n") + | (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ] + | map(select(length > 0)) + ' \ + --argjson maxLayers "$(( maxLayers - 1 ))" # one layer will be taken up by the customisation layer + )" + + cat ${baseJson} | jq ' + . + { + "store_layers": $store_layers, + "customisation_layer", $customisation_layer, + "repo_tag": $repo_tag, + "created": $created + } + ' --argjson store_layers "$store_layers" \ + --arg customisation_layer ${customisationLayer} \ + --arg repo_tag "$imageName:$imageTag" \ + --arg created "$created" | + tee $out + ''; + result = runCommand "stream-${name}" { + inherit (conf) imageName; + buildInputs = [ makeWrapper ]; + } '' + makeWrapper ${streamScript} $out --add-flags ${conf} + ''; + in result; } diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh deleted file mode 100755 index 3a1fcd0c27a..00000000000 --- a/pkgs/build-support/docker/store-path-to-layer.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!@shell@ - -set -eu - -layerNumber=$1 -shift - -layerPath="./layers/$layerNumber" -echo "Creating layer #$layerNumber for $@" - -mkdir -p "$layerPath" - -# Make sure /nix and /nix/store appear first in the archive. -# -# We create the directories here and use them because -# when there are other things being added to the -# nix store, tar could fail, saying, -# "tar: /nix/store: file changed as we read it" -# -# In addition, we use `__Nix__` instead of `nix` to avoid renaming -# relative symlink destinations like -# /nix/store/...-nix-2.3.4/bin/nix-daemon -> nix -mkdir -p __Nix__/store - -# Then we change into the /nix/store in order to -# avoid a similar "file changed as we read it" error -# as above. Namely, if we use the absolute path of -# /nix/store/123-pkg and something new is added to the nix -# store while tar is running, it will detect a change to -# /nix/store and fail. Instead, if we cd into the nix store -# and copy the relative nix store path, tar will ignore -# changes to /nix/store. In order to create the correct -# structure in the tar file, we transform the relative nix -# store path to the absolute store path. -tarhash=$( - basename -a "$@" | - tar --create --preserve-permissions --absolute-names nix \ - --directory /nix/store --verbatim-files-from --files-from - \ - --hard-dereference --sort=name \ - --mtime="@$SOURCE_DATE_EPOCH" \ - --owner=0 --group=0 \ - --transform 's,^__Nix__$,/nix,' \ - --transform 's,^__Nix__/store$,/nix/store,' \ - --transform 's,^[^/],/nix/store/\0,rS' | - tee "$layerPath/layer.tar" | - tarsum -) - -# Add a 'checksum' field to the JSON, with the value set to the -# checksum of the tarball. -cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json - -# Indicate to docker that we're using schema version 1.0. -echo -n "1.0" > $layerPath/VERSION diff --git a/pkgs/build-support/docker/stream_layered_image.py b/pkgs/build-support/docker/stream_layered_image.py new file mode 100644 index 00000000000..a0f2cf726e3 --- /dev/null +++ b/pkgs/build-support/docker/stream_layered_image.py @@ -0,0 +1,165 @@ +import io +import os +import re +import sys +import json +import hashlib +import tarfile +import itertools +import threading +from collections import namedtuple + + +# Adds the given store paths to as a tar to the given writable stream. +def archive_paths_to(obj, paths, add_nix, filter=None): + filter = filter if filter else lambda i: i + + # gettarinfo makes the paths relative, this makes them + # absolute again + def append_root(ti): + ti.name = "/" + ti.name + return ti + + def dir(path): + ti = tarfile.TarInfo(path) + ti.type = tarfile.DIRTYPE + return ti + + with tarfile.open(fileobj=obj, mode="w|") as tar: + if add_nix: + tar.addfile(dir("/nix")) + tar.addfile(dir("/nix/store")) + + for path in paths: + ti = tar.gettarinfo(os.path.join("/", path)) + tar.addfile(filter(append_root(ti))) + + for root, dirs, files in os.walk(path, topdown=True): + for name in itertools.chain(dirs, files): + name = os.path.join(root, name) + ti = append_root(tar.gettarinfo(name)) + + # copy hardlinks as regular files + if ti.islnk(): + ti.type = tarfile.REGTYPE + + ti = filter(ti) + if ti.isfile(): + with open(name, "rb") as f: + tar.addfile(ti, f) + else: + tar.addfile(ti) + + +# A writable stream which only calculates the final file size and +# sha256sum, while discarding the actual contents. +class ExtractChecksum: + def __init__(self): + self._digest = hashlib.sha256() + self._size = 0 + + def write(self, data): + self._digest.update(data) + self._size += len(data) + + def extract(self): + return (self._digest.hexdigest(), self._size) + + +# Some metadata for a layer +LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"]) + + +# Given a list of store paths 'paths', creates a layer add append it +# to tarfile 'tar'. Returns some a 'LayerInfo' for the layer. +def add_layer_dir(tar, paths, add_nix=True, filter=None): + assert all(i.startswith("/nix/store/") for i in paths) + + extract_checksum = ExtractChecksum() + archive_paths_to(extract_checksum, paths, add_nix=add_nix, filter=filter) + (checksum, size) = extract_checksum.extract() + + path = f"{checksum}/layer.tar" + ti = tarfile.TarInfo(path) + ti.size = size + + read_fd, write_fd = os.pipe() + with open(read_fd, "rb") as read, open(write_fd, "wb") as write: + def producer(): + archive_paths_to(write, paths, add_nix=add_nix, filter=filter) + write.close() + threading.Thread(target=producer).start() + tar.addfile(ti, read) + + return LayerInfo(size=size, checksum=checksum, path=path, paths=paths) + + +def add_customisation_layer(tar, path): + def filter(ti): + ti.name = re.sub("^/nix/store/[^/]*", "", ti.name) + return ti + return add_layer_dir(tar, [path], add_nix=False, filter=filter) + + +# Adds a file to the tarball with given path and contents. +def add_bytes(tar, path, content): + assert type(content) is bytes + + ti = tarfile.TarInfo(path) + ti.size = len(content) + tar.addfile(ti, io.BytesIO(content)) + + +# Main + +with open(sys.argv[1], "r") as f: + conf = json.load(f) + +with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar: + layers = [] + for num, store_layer in enumerate(conf["store_layers"]): + print( + "Creating layer", num, + "from paths:", store_layer, + file=sys.stderr) + info = add_layer_dir(tar, store_layer) + layers.append(info) + + print("Creating the customisation layer...", file=sys.stderr) + layers.append(add_customisation_layer(tar, conf["customisation_layer"])) + + print("Adding manifests...", file=sys.stderr) + image_json = { + "created": conf["created"], + "architecture": conf["architecture"], + "os": "linux", + "config": conf["config"], + "rootfs": { + "diff_ids": [f"sha256:{layer.checksum}" for layer in layers], + "type": "layers", + }, + "history": [ + { + "created": conf["created"], + "comment": f"store paths: {layer.paths}" + } + for layer in layers + ], + } + + image_json = json.dumps(image_json, indent=4).encode("utf-8") + image_json_checksum = hashlib.sha256(image_json).hexdigest() + image_json_path = f"{image_json_checksum}.json" + add_bytes(tar, image_json_path, image_json) + + manifest_json = [ + { + "Config": image_json_path, + "RepoTags": [conf["repo_tag"]], + "Layers": [layer.path for layer in layers], + } + ] + manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8") + add_bytes(tar, "manifest.json", manifest_json) + + print("Done.", file=sys.stderr) diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index e31c42261e7..fa68b749363 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -213,7 +213,9 @@ in grsync = callPackage ../applications/misc/grsync { }; - dockerTools = callPackage ../build-support/docker { }; + dockerTools = callPackage ../build-support/docker { + writePython3 = writers.writePython3; + }; snapTools = callPackage ../build-support/snap { };