diff --git a/doc/functions.xml b/doc/functions.xml index 0c0d82b0342c..c4cbf0adb203 100644 --- a/doc/functions.xml +++ b/doc/functions.xml @@ -682,6 +682,183 @@ hello latest de2bf4786de6 About a minute ago 25.2MB +
+ buildLayeredImage + + + Create a Docker image with many of the store paths being on their own layer + to improve sharing between images. + + + + + + name + + + + The name of the resulting image. + + + + + + tag optional + + + + Tag of the generated image. + + + Default: the output path's hash + + + + + + contents optional + + + + Top level paths in the container. Either a single derivation, or a list + of derivations. + + + Default: [] + + + + + + config optional + + + + Run-time configuration of the container. A full list of the options are + available at in the + + Docker Image Specification v1.2.0 . + + + Default: {} + + + + + + created optional + + + + Date and time the layers were created. Follows the same + now exception supported by + buildImage. + + + Default: 1970-01-01T00:00:01Z + + + + + + maxLayers optional + + + + Maximum number of layers to create. + + + Default: 24 + + + + + +
+ Behavior of <varname>contents</varname> in the final image + + + Each path directly listed in contents will have a + symlink in the root of the image. + + + + For example: + + will create symlinks for all the paths in the hello + package: + /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello +/share/info/hello.info -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/hello.info +/share/locale/bg/LC_MESSAGES/hello.mo -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/hello.mo +]]> + +
+ +
+ Automatic inclusion of <varname>config</varname> references + + + The closure of config is automatically included in the + closure of the final image. + + + + This is different from pkgs.dockerTools.buildImage + which does not automatically include the + configuration's closure. + + + + This allows you to make very simple Docker images with very little code. + This container will start up and run hello: + + +
+ +
+ Adjusting <varname>maxLayers</varname> + + + Increasing the maxLayers increases the number of layers + which have a chance to be shared between different images. + + + + Modern Docker installations support up to 128 layers, however older + versions support as few as 42. + + + + If the produced image will not be extended by other Docker builds, it is + safe to set maxLayers to 128. + However it will be impossible to extend the image further. + + + + The first (maxLayers-2) most "popular" paths will have + their own individual layers, then layer #maxLayers-1 + will contain all the remaining "unpopular" paths, and finally layer + #maxLayers will contain the Image configuration. + + + + Docker's Layers are not inherently ordered, they are content-addressable + and are not explicitly layered until they are composed in to an Image. + +
+
+
pullImage diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 6b5a06486e79..73639a521b6a 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -1,4 +1,5 @@ { + symlinkJoin, coreutils, docker, e2fsprogs, @@ -19,6 +20,7 @@ utillinux, vmTools, writeReferencesToFile, + referencesByPopularity, writeScript, writeText, }: @@ -272,6 +274,81 @@ rec { perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths ''; + # Create $maxLayers worth of Docker Layers, one layer per store path + # unless there are more paths than $maxLayers. In that case, create + # $maxLayers-1 for the most popular layers, and smush the remainaing + # store paths in to one final layer. + mkManyPureLayers = { + name, + # Files to add to the layer. + closure, + configJson, + # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty + # of room for extension + maxLayers ? 24 + }: + runCommand "${name}-granular-docker-layers" { + inherit maxLayers; + paths = referencesByPopularity closure; + buildInputs = [ jshon rsync tarsum ]; + enableParallelBuilding = true; + } + '' + # Delete impurities for store path layers, so they don't get + # shared and taint other projects. + cat ${configJson} \ + | jshon -d config \ + | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json + + # WARNING! + # The following code is fiddly w.r.t. ensuring every layer is + # created, and that no paths are missed. If you change the + # following head and tail call lines, double-check that your + # code behaves properly when the number of layers equals: + # maxLayers-1, maxLayers, and maxLayers+1 + head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh} + if [ $(cat $paths | wc -l) -ge $maxLayers ]; then + tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers + fi + + echo "Finished building layer '$name'" + + mv ./layers $out + ''; + + # Create a "Customisation" layer which adds symlinks at the root of + # the image to the root paths of the closure. Also add the config + # data like what command to run and the environment to run it in. + mkCustomisationLayer = { + name, + # Files to add to the layer. + contents, + baseJson, + uid ? 0, gid ? 0, + }: + runCommand "${name}-customisation-layer" { + buildInputs = [ jshon rsync tarsum ]; + } + '' + cp -r ${contents}/ ./layer + + # Tar up the layer and throw it into 'layer.tar'. + echo "Packing layer..." + mkdir $out + tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar . + + # Compute a checksum of the tarball. + echo "Computing layer checksum..." + tarhash=$(tarsum < $out/layer.tar) + + # Add a 'checksum' field to the JSON, with the value set to the + # checksum of the tarball. + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json + + # Indicate to docker that we're using schema version 1.0. + echo -n "1.0" > $out/VERSION + ''; + # Create a "layer" (set of files). mkPureLayer = { # Name of the layer @@ -413,6 +490,104 @@ rec { ''; }; + buildLayeredImage = { + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Docker config; e.g. what command to run on the container. + config ? {}, + # Time of creation of the image. Passing "now" will make the + # created date be the time of building. + created ? "1970-01-01T00:00:01Z", + # Docker's lowest maximum layer limit is 42-layers for an old + # version of the AUFS graph driver. We pick 24 to ensure there is + # plenty of room for extension. I believe the actual maximum is + # 128. + maxLayers ? 24 + }: + let + uid = 0; + gid = 0; + baseName = baseNameOf name; + contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); }; + + configJson = let + pure = writeText "${baseName}-config.json" (builtins.toJSON { + inherit created config; + architecture = "amd64"; + os = "linux"; + }); + impure = runCommand "${baseName}-standard-dynamic-date.json" + { buildInputs = [ jq ]; } + '' + jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out + ''; + in if created == "now" then impure else pure; + + bulkLayers = mkManyPureLayers { + name = baseName; + closure = writeText "closure" "${contentsEnv} ${configJson}"; + # One layer will be taken up by the customisationLayer, so + # take up one less. + maxLayers = maxLayers - 1; + inherit configJson; + }; + customisationLayer = mkCustomisationLayer { + name = baseName; + contents = contentsEnv; + baseJson = configJson; + inherit uid gid; + }; + result = runCommand "docker-image-${baseName}.tar.gz" { + buildInputs = [ jshon pigz coreutils findutils jq ]; + # Image name and tag must be lowercase + imageName = lib.toLower name; + imageTag = if tag == null then "" else lib.toLower tag; + baseJson = configJson; + } '' + ${lib.optionalString (tag == null) '' + outName="$(basename "$out")" + outHash=$(echo "$outName" | cut -d - -f 1) + + imageTag=$outHash + ''} + + find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list + echo ${customisationLayer} >> layer-list + + mkdir image + imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}") + manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]") + for layer in $(cat layer-list); do + layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1) + layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1) + ln -s "$layer" "./image/$layerID" + + manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .") + imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .") + imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .") + done + imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1) + echo "$imageJson" > "image/$imageJsonChecksum.json" + manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"") + echo "$manifestJson" > image/manifest.json + + jshon -n object \ + -n object -s "$layerID" -i "$imageTag" \ + -i "$imageName" > image/repositories + + echo "Cooking the image..." + tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out + + echo "Finished." + ''; + + in + result; + # 1. extract the base image # 2. create the layer # 3. add layer deps to the layer itself, diffing with the base image diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh new file mode 100755 index 000000000000..ff814c1f6130 --- /dev/null +++ b/pkgs/build-support/docker/store-path-to-layer.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -eu + +layerNumber=$1 +shift + +layerPath="./layers/$layerNumber" +echo "Creating layer #$layerNumber for $@" + +mkdir -p "$layerPath" +tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \ + --mtime="@$SOURCE_DATE_EPOCH" \ + --owner=0 --group=0 "$@" + +# Compute a checksum of the tarball. +tarhash=$(tarsum < $layerPath/layer.tar) + +# Add a 'checksum' field to the JSON, with the value set to the +# checksum of the tarball. +cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json + +# Indicate to docker that we're using schema version 1.0. +echo -n "1.0" > $layerPath/VERSION