diff --git a/doc/functions.xml b/doc/functions.xml
index 0c0d82b0342c..8223a8b0531c 100644
--- a/doc/functions.xml
+++ b/doc/functions.xml
@@ -682,6 +682,177 @@ hello latest de2bf4786de6 About a minute ago 25.2MB
+
+ buildLayeredImage
+
+
+ Create a Docker image with many of the store paths being on their own layer
+ to improve sharing between images.
+
+
+
+
+
+ name
+
+
+
+ The name of the resulting image.
+
+
+
+
+
+ tagoptional
+
+
+
+ Tag of the generated image.
+
+
+ Default: the output path's hash
+
+
+
+
+
+ contentsoptional
+
+
+
+ Top level paths in the container. Either a single derivation, or a list
+ of derivations.
+
+
+ Default:[]
+
+
+
+
+
+ configoptional
+
+
+
+ Run-time configuration of the container. A full list of the options are
+ available at in the
+
+ Docker Image Specification v1.2.0 .
+
+
+ Default:{}
+
+
+
+
+
+ createdoptional
+
+
+
+ Date and time the layers were created. Follows the same
+ now exception supported by
+ buildImage.
+
+
+ Default:1970-01-01T00:00:01Z
+
+
+
+
+
+ maxLayersoptional
+
+
+
+ Maximum number of layers to create.
+
+
+ Default:24
+
+
+
+
+
+
+ Behavior of contents in the final image
+
+
+ Each path directly listed in contents will have a
+ symlink in the root of the image.
+
+
+
+ For example:
+
+ will create symlinks for all the paths in the hello
+ package:
+ /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello
+/share/info/hello.info -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/hello.info
+/share/locale/bg/LC_MESSAGES/hello.mo -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/hello.mo
+]]>
+
+
+
+
+ Automatic inclusion of config references
+
+
+ The closure of config is automatically included in the
+ closure of the final image.
+
+
+
+ This allows you to make very simple Docker images with very little code.
+ This container will start up and run hello:
+
+
+
+
+
+ Adjusting maxLayers
+
+
+ Increasing the maxLayers increases the number of layers
+ which have a chance to be shared between different images.
+
+
+
+ Modern Docker installations support up to 128 layers, however older
+ versions support as few as 42.
+
+
+
+ If the produced image will not be extended by other Docker builds, it is
+ safe to set maxLayers to 128.
+ However it will be impossible to extend the image further.
+
+
+
+ The first (maxLayers-2) most "popular" paths will have
+ their own individual layers, then layer #maxLayers-1
+ will contain all the remaining "unpopular" paths, and finally layer
+ #maxLayers will contain the Image configuration.
+
+
+
+ Docker's Layers are not inherently ordered, they are content-addressable
+ and are not explicitly layered until they are composed in to an Image.
+
+
+
+
pullImage
diff --git a/nixos/tests/docker-tools.nix b/nixos/tests/docker-tools.nix
index 5a7590cbf364..360b32faae72 100644
--- a/nixos/tests/docker-tools.nix
+++ b/nixos/tests/docker-tools.nix
@@ -58,5 +58,9 @@ import ./make-test.nix ({ pkgs, ... }: {
# Ensure Docker images can use an unstable date
$docker->succeed("docker load --input='${pkgs.dockerTools.examples.bash}'");
$docker->succeed("[ '1970-01-01T00:00:01Z' != \"\$(docker inspect ${pkgs.dockerTools.examples.unstableDate.imageName} | ${pkgs.jq}/bin/jq -r .[].Created)\" ]");
+
+ # Ensure Layered Docker images work
+ $docker->succeed("docker load --input='${pkgs.dockerTools.examples.layered-image}'");
+ $docker->succeed("docker run --rm ${pkgs.dockerTools.examples.layered-image.imageName}");
'';
})
diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix
index 0cee1dd2916f..73639a521b6a 100644
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
@@ -1,4 +1,5 @@
{
+ symlinkJoin,
coreutils,
docker,
e2fsprogs,
@@ -19,6 +20,7 @@
utillinux,
vmTools,
writeReferencesToFile,
+ referencesByPopularity,
writeScript,
writeText,
}:
@@ -77,7 +79,9 @@ rec {
ln -sT ${docker.src}/components/engine/pkg/tarsum src/github.com/docker/docker/pkg/tarsum
go build
- cp tarsum $out
+ mkdir -p $out/bin
+
+ cp tarsum $out/bin/
'';
# buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM
@@ -270,6 +274,81 @@ rec {
perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths
'';
+ # Create $maxLayers worth of Docker Layers, one layer per store path
+ # unless there are more paths than $maxLayers. In that case, create
+ # $maxLayers-1 for the most popular layers, and smush the remainaing
+ # store paths in to one final layer.
+ mkManyPureLayers = {
+ name,
+ # Files to add to the layer.
+ closure,
+ configJson,
+ # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty
+ # of room for extension
+ maxLayers ? 24
+ }:
+ runCommand "${name}-granular-docker-layers" {
+ inherit maxLayers;
+ paths = referencesByPopularity closure;
+ buildInputs = [ jshon rsync tarsum ];
+ enableParallelBuilding = true;
+ }
+ ''
+ # Delete impurities for store path layers, so they don't get
+ # shared and taint other projects.
+ cat ${configJson} \
+ | jshon -d config \
+ | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
+
+ # WARNING!
+ # The following code is fiddly w.r.t. ensuring every layer is
+ # created, and that no paths are missed. If you change the
+ # following head and tail call lines, double-check that your
+ # code behaves properly when the number of layers equals:
+ # maxLayers-1, maxLayers, and maxLayers+1
+ head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh}
+ if [ $(cat $paths | wc -l) -ge $maxLayers ]; then
+ tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers
+ fi
+
+ echo "Finished building layer '$name'"
+
+ mv ./layers $out
+ '';
+
+ # Create a "Customisation" layer which adds symlinks at the root of
+ # the image to the root paths of the closure. Also add the config
+ # data like what command to run and the environment to run it in.
+ mkCustomisationLayer = {
+ name,
+ # Files to add to the layer.
+ contents,
+ baseJson,
+ uid ? 0, gid ? 0,
+ }:
+ runCommand "${name}-customisation-layer" {
+ buildInputs = [ jshon rsync tarsum ];
+ }
+ ''
+ cp -r ${contents}/ ./layer
+
+ # Tar up the layer and throw it into 'layer.tar'.
+ echo "Packing layer..."
+ mkdir $out
+ tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar .
+
+ # Compute a checksum of the tarball.
+ echo "Computing layer checksum..."
+ tarhash=$(tarsum < $out/layer.tar)
+
+ # Add a 'checksum' field to the JSON, with the value set to the
+ # checksum of the tarball.
+ cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
+
+ # Indicate to docker that we're using schema version 1.0.
+ echo -n "1.0" > $out/VERSION
+ '';
+
# Create a "layer" (set of files).
mkPureLayer = {
# Name of the layer
@@ -287,7 +366,7 @@ rec {
}:
runCommand "docker-layer-${name}" {
inherit baseJson contents extraCommands;
- buildInputs = [ jshon rsync ];
+ buildInputs = [ jshon rsync tarsum ];
}
''
mkdir layer
@@ -314,11 +393,11 @@ rec {
# Compute a checksum of the tarball.
echo "Computing layer checksum..."
- tarsum=$(${tarsum} < $out/layer.tar)
+ tarhash=$(tarsum < $out/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
- cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json
+ cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@@ -402,8 +481,8 @@ rec {
# Compute the tar checksum and add it to the output json.
echo "Computing checksum..."
- ts=$(${tarsum} < $out/layer.tar)
- cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json
+ tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar)
+ cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@@ -411,6 +490,104 @@ rec {
'';
};
+ buildLayeredImage = {
+ # Image Name
+ name,
+ # Image tag, the Nix's output hash will be used if null
+ tag ? null,
+ # Files to put on the image (a nix store path or list of paths).
+ contents ? [],
+ # Docker config; e.g. what command to run on the container.
+ config ? {},
+ # Time of creation of the image. Passing "now" will make the
+ # created date be the time of building.
+ created ? "1970-01-01T00:00:01Z",
+ # Docker's lowest maximum layer limit is 42-layers for an old
+ # version of the AUFS graph driver. We pick 24 to ensure there is
+ # plenty of room for extension. I believe the actual maximum is
+ # 128.
+ maxLayers ? 24
+ }:
+ let
+ uid = 0;
+ gid = 0;
+ baseName = baseNameOf name;
+ contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); };
+
+ configJson = let
+ pure = writeText "${baseName}-config.json" (builtins.toJSON {
+ inherit created config;
+ architecture = "amd64";
+ os = "linux";
+ });
+ impure = runCommand "${baseName}-standard-dynamic-date.json"
+ { buildInputs = [ jq ]; }
+ ''
+ jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
+ '';
+ in if created == "now" then impure else pure;
+
+ bulkLayers = mkManyPureLayers {
+ name = baseName;
+ closure = writeText "closure" "${contentsEnv} ${configJson}";
+ # One layer will be taken up by the customisationLayer, so
+ # take up one less.
+ maxLayers = maxLayers - 1;
+ inherit configJson;
+ };
+ customisationLayer = mkCustomisationLayer {
+ name = baseName;
+ contents = contentsEnv;
+ baseJson = configJson;
+ inherit uid gid;
+ };
+ result = runCommand "docker-image-${baseName}.tar.gz" {
+ buildInputs = [ jshon pigz coreutils findutils jq ];
+ # Image name and tag must be lowercase
+ imageName = lib.toLower name;
+ imageTag = if tag == null then "" else lib.toLower tag;
+ baseJson = configJson;
+ } ''
+ ${lib.optionalString (tag == null) ''
+ outName="$(basename "$out")"
+ outHash=$(echo "$outName" | cut -d - -f 1)
+
+ imageTag=$outHash
+ ''}
+
+ find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
+ echo ${customisationLayer} >> layer-list
+
+ mkdir image
+ imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
+ manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
+ for layer in $(cat layer-list); do
+ layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
+ layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
+ ln -s "$layer" "./image/$layerID"
+
+ manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .")
+ imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .")
+ imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .")
+ done
+ imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
+ echo "$imageJson" > "image/$imageJsonChecksum.json"
+ manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
+ echo "$manifestJson" > image/manifest.json
+
+ jshon -n object \
+ -n object -s "$layerID" -i "$imageTag" \
+ -i "$imageName" > image/repositories
+
+ echo "Cooking the image..."
+ tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out
+
+ echo "Finished."
+ '';
+
+ in
+ result;
+
# 1. extract the base image
# 2. create the layer
# 3. add layer deps to the layer itself, diffing with the base image
diff --git a/pkgs/build-support/docker/examples.nix b/pkgs/build-support/docker/examples.nix
index 822e0dbb31f2..003e7429a81b 100644
--- a/pkgs/build-support/docker/examples.nix
+++ b/pkgs/build-support/docker/examples.nix
@@ -150,4 +150,11 @@ rec {
contents = [ pkgs.coreutils ];
created = "now";
};
+
+ # 10. Create a layered image
+ layered-image = pkgs.dockerTools.buildLayeredImage {
+ name = "layered-image";
+ tag = "latest";
+ config.Cmd = [ "${pkgs.hello}/bin/hello" ];
+ };
}
diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh
new file mode 100755
index 000000000000..ff814c1f6130
--- /dev/null
+++ b/pkgs/build-support/docker/store-path-to-layer.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -eu
+
+layerNumber=$1
+shift
+
+layerPath="./layers/$layerNumber"
+echo "Creating layer #$layerNumber for $@"
+
+mkdir -p "$layerPath"
+tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \
+ --mtime="@$SOURCE_DATE_EPOCH" \
+ --owner=0 --group=0 "$@"
+
+# Compute a checksum of the tarball.
+tarhash=$(tarsum < $layerPath/layer.tar)
+
+# Add a 'checksum' field to the JSON, with the value set to the
+# checksum of the tarball.
+cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
+
+# Indicate to docker that we're using schema version 1.0.
+echo -n "1.0" > $layerPath/VERSION
diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py
new file mode 100644
index 000000000000..d67a5dfcf140
--- /dev/null
+++ b/pkgs/build-support/references-by-popularity/closure-graph.py
@@ -0,0 +1,520 @@
+# IMPORTANT: Making changes?
+#
+# Validate your changes with python3 ./closure-graph.py --test
+
+
+# Using a simple algorithm, convert the references to a path in to a
+# sorted list of dependent paths based on how often they're referenced
+# and how deep in the tree they live. Equally-"popular" paths are then
+# sorted by name.
+#
+# The existing writeReferencesToFile prints the paths in a simple
+# ascii-based sorting of the paths.
+#
+# Sorting the paths by graph improves the chances that the difference
+# between two builds appear near the end of the list, instead of near
+# the beginning. This makes a difference for Nix builds which export a
+# closure for another program to consume, if that program implements its
+# own level of binary diffing.
+#
+# For an example, Docker Images. If each store path is a separate layer
+# then Docker Images can be very efficiently transfered between systems,
+# and we get very good cache reuse between images built with the same
+# version of Nixpkgs. However, since Docker only reliably supports a
+# small number of layers (42) it is important to pick the individual
+# layers carefully. By storing very popular store paths in the first 40
+# layers, we improve the chances that the next Docker image will share
+# many of those layers.*
+#
+# Given the dependency tree:
+#
+# A - B - C - D -\
+# \ \ \ \
+# \ \ \ \
+# \ \ - E ---- F
+# \- G
+#
+# Nodes which have multiple references are duplicated:
+#
+# A - B - C - D - F
+# \ \ \
+# \ \ \- E - F
+# \ \
+# \ \- E - F
+# \
+# \- G
+#
+# Each leaf node is now replaced by a counter defaulted to 1:
+#
+# A - B - C - D - (F:1)
+# \ \ \
+# \ \ \- E - (F:1)
+# \ \
+# \ \- E - (F:1)
+# \
+# \- (G:1)
+#
+# Then each leaf counter is merged with its parent node, replacing the
+# parent node with a counter of 1, and each existing counter being
+# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
+#
+# A - B - C - (D:1, F:2)
+# \ \ \
+# \ \ \- (E:1, F:2)
+# \ \
+# \ \- (E:1, F:2)
+# \
+# \- (G:1)
+#
+# Then each leaf counter is merged with its parent node again, merging
+# any counters, then incrementing each:
+#
+# A - B - (C:1, D:2, E:2, F:5)
+# \ \
+# \ \- (E:1, F:2)
+# \
+# \- (G:1)
+#
+# And again:
+#
+# A - (B:1, C:2, D:3, E:4, F:8)
+# \
+# \- (G:1)
+#
+# And again:
+#
+# (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
+#
+# and then paths have the following "popularity":
+#
+# A 1
+# B 2
+# C 3
+# D 4
+# E 5
+# F 9
+# G 2
+#
+# and the popularity contest would result in the paths being printed as:
+#
+# F
+# E
+# D
+# C
+# B
+# G
+# A
+#
+# * Note: People who have used a Dockerfile before assume Docker's
+# Layers are inherently ordered. However, this is not true -- Docker
+# layers are content-addressable and are not explicitly layered until
+# they are composed in to an Image.
+
+import sys
+import json
+import unittest
+
+from pprint import pprint
+from collections import defaultdict
+
+# Find paths in the original dataset which are never referenced by
+# any other paths
+def find_roots(closures):
+ roots = [];
+
+ for closure in closures:
+ path = closure['path']
+ if not any_refer_to(path, closures):
+ roots.append(path)
+
+ return roots
+
+class TestFindRoots(unittest.TestCase):
+ def test_find_roots(self):
+ self.assertCountEqual(
+ find_roots([
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/foo",
+ "/nix/store/bar"
+ ]
+ },
+ {
+ "path": "/nix/store/bar",
+ "references": [
+ "/nix/store/bar",
+ "/nix/store/tux"
+ ]
+ },
+ {
+ "path": "/nix/store/hello",
+ "references": [
+ ]
+ }
+ ]),
+ ["/nix/store/foo", "/nix/store/hello"]
+ )
+
+
+def any_refer_to(path, closures):
+ for closure in closures:
+ if path != closure['path']:
+ if path in closure['references']:
+ return True
+ return False
+
+class TestAnyReferTo(unittest.TestCase):
+ def test_has_references(self):
+ self.assertTrue(
+ any_refer_to(
+ "/nix/store/bar",
+ [
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/bar"
+ ]
+ },
+ ]
+ ),
+ )
+ def test_no_references(self):
+ self.assertFalse(
+ any_refer_to(
+ "/nix/store/foo",
+ [
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/foo",
+ "/nix/store/bar"
+ ]
+ },
+ ]
+ ),
+ )
+
+def all_paths(closures):
+ paths = []
+ for closure in closures:
+ paths.append(closure['path'])
+ paths.extend(closure['references'])
+ paths.sort()
+ return list(set(paths))
+
+
+class TestAllPaths(unittest.TestCase):
+ def test_returns_all_paths(self):
+ self.assertCountEqual(
+ all_paths([
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/foo",
+ "/nix/store/bar"
+ ]
+ },
+ {
+ "path": "/nix/store/bar",
+ "references": [
+ "/nix/store/bar",
+ "/nix/store/tux"
+ ]
+ },
+ {
+ "path": "/nix/store/hello",
+ "references": [
+ ]
+ }
+ ]),
+ ["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]
+ )
+ def test_no_references(self):
+ self.assertFalse(
+ any_refer_to(
+ "/nix/store/foo",
+ [
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/foo",
+ "/nix/store/bar"
+ ]
+ },
+ ]
+ ),
+ )
+
+# Convert:
+#
+# [
+# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
+# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
+# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
+# { path: /nix/store/tux, references: [ /nix/store/tux ] }
+# ]
+#
+# To:
+# {
+# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+# /nix/store/bar: [ /nix/store/baz ],
+# /nix/store/baz: [ /nix/store/tux ] },
+# /nix/store/tux: [ ]
+# }
+#
+# Note that it drops self-references to avoid loops.
+def make_lookup(closures):
+ lookup = {}
+
+ for closure in closures:
+ # paths often self-refer
+ nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]
+ lookup[closure['path']] = nonreferential_paths
+
+ return lookup
+
+class TestMakeLookup(unittest.TestCase):
+ def test_returns_lookp(self):
+ self.assertDictEqual(
+ make_lookup([
+ {
+ "path": "/nix/store/foo",
+ "references": [
+ "/nix/store/foo",
+ "/nix/store/bar"
+ ]
+ },
+ {
+ "path": "/nix/store/bar",
+ "references": [
+ "/nix/store/bar",
+ "/nix/store/tux"
+ ]
+ },
+ {
+ "path": "/nix/store/hello",
+ "references": [
+ ]
+ }
+ ]),
+ {
+ "/nix/store/foo": [ "/nix/store/bar" ],
+ "/nix/store/bar": [ "/nix/store/tux" ],
+ "/nix/store/hello": [ ],
+ }
+ )
+
+# Convert:
+#
+# /nix/store/foo with
+# {
+# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+# /nix/store/bar: [ /nix/store/baz ],
+# /nix/store/baz: [ /nix/store/tux ] },
+# /nix/store/tux: [ ]
+# }
+#
+# To:
+#
+# {
+# /nix/store/bar: {
+# /nix/store/baz: {
+# /nix/store/tux: {}
+# }
+# },
+# /nix/store/baz: {
+# /nix/store/tux: {}
+# }
+# }
+def make_graph_segment_from_root(root, lookup):
+ children = {}
+ for ref in lookup[root]:
+ children[ref] = make_graph_segment_from_root(ref, lookup)
+ return children
+
+class TestMakeGraphSegmentFromRoot(unittest.TestCase):
+ def test_returns_graph(self):
+ self.assertDictEqual(
+ make_graph_segment_from_root("/nix/store/foo", {
+ "/nix/store/foo": [ "/nix/store/bar" ],
+ "/nix/store/bar": [ "/nix/store/tux" ],
+ "/nix/store/tux": [ ],
+ "/nix/store/hello": [ ],
+ }),
+ {
+ "/nix/store/bar": {
+ "/nix/store/tux": {}
+ }
+ }
+ )
+ def test_returns_graph_tiny(self):
+ self.assertDictEqual(
+ make_graph_segment_from_root("/nix/store/tux", {
+ "/nix/store/foo": [ "/nix/store/bar" ],
+ "/nix/store/bar": [ "/nix/store/tux" ],
+ "/nix/store/tux": [ ],
+ }),
+ {}
+ )
+
+# Convert a graph segment in to a popularity-counted dictionary:
+#
+# From:
+# {
+# /nix/store/foo: {
+# /nix/store/bar: {
+# /nix/store/baz: {
+# /nix/store/tux: {}
+# }
+# }
+# /nix/store/baz: {
+# /nix/store/tux: {}
+# }
+# }
+# }
+#
+# to:
+# [
+# /nix/store/foo: 1
+# /nix/store/bar: 2
+# /nix/store/baz: 4
+# /nix/store/tux: 6
+# ]
+def graph_popularity_contest(full_graph):
+ popularity = defaultdict(int)
+ for path, subgraph in full_graph.items():
+ popularity[path] += 1
+ subcontest = graph_popularity_contest(subgraph)
+ for subpath, subpopularity in subcontest.items():
+ popularity[subpath] += subpopularity + 1
+
+ return popularity
+
+class TestGraphPopularityContest(unittest.TestCase):
+ def test_counts_popularity(self):
+ self.assertDictEqual(
+ graph_popularity_contest({
+ "/nix/store/foo": {
+ "/nix/store/bar": {
+ "/nix/store/baz": {
+ "/nix/store/tux": {}
+ }
+ },
+ "/nix/store/baz": {
+ "/nix/store/tux": {}
+ }
+ }
+ }),
+ {
+ "/nix/store/foo": 1,
+ "/nix/store/bar": 2,
+ "/nix/store/baz": 4,
+ "/nix/store/tux": 6,
+ }
+ )
+
+# Emit a list of packages by popularity, most first:
+#
+# From:
+# [
+# /nix/store/foo: 1
+# /nix/store/bar: 1
+# /nix/store/baz: 2
+# /nix/store/tux: 2
+# ]
+#
+# To:
+# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
+def order_by_popularity(paths):
+ paths_by_popularity = defaultdict(list)
+ popularities = []
+ for path, popularity in paths.items():
+ popularities.append(popularity)
+ paths_by_popularity[popularity].append(path)
+
+ popularities = list(set(popularities))
+ popularities.sort()
+
+ flat_ordered = []
+ for popularity in popularities:
+ paths = paths_by_popularity[popularity]
+ paths.sort(key=package_name)
+
+ flat_ordered.extend(reversed(paths))
+ return list(reversed(flat_ordered))
+
+
+class TestOrderByPopularity(unittest.TestCase):
+ def test_returns_in_order(self):
+ self.assertEqual(
+ order_by_popularity({
+ "/nix/store/foo": 1,
+ "/nix/store/bar": 1,
+ "/nix/store/baz": 2,
+ "/nix/store/tux": 2,
+ }),
+ [
+ "/nix/store/baz",
+ "/nix/store/tux",
+ "/nix/store/bar",
+ "/nix/store/foo"
+ ]
+ )
+
+def package_name(path):
+ parts = path.split('-')
+ start = parts.pop(0)
+ # don't throw away any data, so the order is always the same.
+ # even in cases where only the hash at the start has changed.
+ parts.append(start)
+ return '-'.join(parts)
+
+def main():
+ filename = sys.argv[1]
+ key = sys.argv[2]
+
+ with open(filename) as f:
+ data = json.load(f)
+
+ # Data comes in as:
+ # [
+ # { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
+ # { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
+ # { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
+ # { path: /nix/store/tux, references: [ /nix/store/tux ] }
+ # ]
+ #
+ # and we want to get out a list of paths ordered by how universally,
+ # important they are, ie: tux is referenced by every path, transitively
+ # so it should be #1
+ #
+ # [
+ # /nix/store/tux,
+ # /nix/store/baz,
+ # /nix/store/bar,
+ # /nix/store/foo,
+ # ]
+ graph = data[key]
+
+ roots = find_roots(graph);
+ lookup = make_lookup(graph)
+
+ full_graph = {}
+ for root in roots:
+ full_graph[root] = make_graph_segment_from_root(root, lookup)
+
+ ordered = order_by_popularity(graph_popularity_contest(full_graph))
+ missing = []
+ for path in all_paths(graph):
+ if path not in ordered:
+ missing.append(path)
+
+ ordered.extend(missing)
+ print("\n".join(ordered))
+
+if "--test" in sys.argv:
+ # Don't pass --test otherwise unittest gets mad
+ unittest.main(argv = [f for f in sys.argv if f != "--test" ])
+else:
+ main()
diff --git a/pkgs/build-support/references-by-popularity/default.nix b/pkgs/build-support/references-by-popularity/default.nix
new file mode 100644
index 000000000000..4cae2dcf3ca9
--- /dev/null
+++ b/pkgs/build-support/references-by-popularity/default.nix
@@ -0,0 +1,15 @@
+{ runCommand, python3, coreutils }:
+# Write the references of `path' to a file, in order of how "popular" each
+# reference is. Nix 2 only.
+path: runCommand "closure-paths"
+{
+ exportReferencesGraph.graph = path;
+ __structuredAttrs = true;
+ PATH = "${coreutils}/bin:${python3}/bin";
+ builder = builtins.toFile "builder"
+ ''
+ . .attrs.sh
+ python3 ${./closure-graph.py} .attrs.json graph > ''${outputs[out]}
+ '';
+ }
+ ""
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index c8d15dbd4063..33a2b6455a23 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -365,6 +365,8 @@ with pkgs;
nukeReferences = callPackage ../build-support/nuke-references { };
+ referencesByPopularity = callPackage ../build-support/references-by-popularity { };
+
removeReferencesTo = callPackage ../build-support/remove-references-to { };
vmTools = callPackage ../build-support/vm { };