diff --git a/doc/functions.xml b/doc/functions.xml index 0c0d82b0342c..8223a8b0531c 100644 --- a/doc/functions.xml +++ b/doc/functions.xml @@ -682,6 +682,177 @@ hello latest de2bf4786de6 About a minute ago 25.2MB +
+ buildLayeredImage + + + Create a Docker image with many of the store paths being on their own layer + to improve sharing between images. + + + + + + name + + + + The name of the resulting image. + + + + + + tag optional + + + + Tag of the generated image. + + + Default: the output path's hash + + + + + + contents optional + + + + Top level paths in the container. Either a single derivation, or a list + of derivations. + + + Default: [] + + + + + + config optional + + + + Run-time configuration of the container. A full list of the options are + available at in the + + Docker Image Specification v1.2.0 . + + + Default: {} + + + + + + created optional + + + + Date and time the layers were created. Follows the same + now exception supported by + buildImage. + + + Default: 1970-01-01T00:00:01Z + + + + + + maxLayers optional + + + + Maximum number of layers to create. + + + Default: 24 + + + + + +
+ Behavior of <varname>contents</varname> in the final image + + + Each path directly listed in contents will have a + symlink in the root of the image. + + + + For example: + + will create symlinks for all the paths in the hello + package: + /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello +/share/info/hello.info -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/hello.info +/share/locale/bg/LC_MESSAGES/hello.mo -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/hello.mo +]]> + +
+ +
+ Automatic inclusion of <varname>config</varname> references + + + The closure of config is automatically included in the + closure of the final image. + + + + This allows you to make very simple Docker images with very little code. + This container will start up and run hello: + + +
+ +
+ Adjusting <varname>maxLayers</varname> + + + Increasing the maxLayers increases the number of layers + which have a chance to be shared between different images. + + + + Modern Docker installations support up to 128 layers, however older + versions support as few as 42. + + + + If the produced image will not be extended by other Docker builds, it is + safe to set maxLayers to 128. + However it will be impossible to extend the image further. + + + + The first (maxLayers-2) most "popular" paths will have + their own individual layers, then layer #maxLayers-1 + will contain all the remaining "unpopular" paths, and finally layer + #maxLayers will contain the Image configuration. + + + + Docker's Layers are not inherently ordered, they are content-addressable + and are not explicitly layered until they are composed in to an Image. + +
+
+
pullImage diff --git a/nixos/tests/docker-tools.nix b/nixos/tests/docker-tools.nix index 5a7590cbf364..360b32faae72 100644 --- a/nixos/tests/docker-tools.nix +++ b/nixos/tests/docker-tools.nix @@ -58,5 +58,9 @@ import ./make-test.nix ({ pkgs, ... }: { # Ensure Docker images can use an unstable date $docker->succeed("docker load --input='${pkgs.dockerTools.examples.bash}'"); $docker->succeed("[ '1970-01-01T00:00:01Z' != \"\$(docker inspect ${pkgs.dockerTools.examples.unstableDate.imageName} | ${pkgs.jq}/bin/jq -r .[].Created)\" ]"); + + # Ensure Layered Docker images work + $docker->succeed("docker load --input='${pkgs.dockerTools.examples.layered-image}'"); + $docker->succeed("docker run --rm ${pkgs.dockerTools.examples.layered-image.imageName}"); ''; }) diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 0cee1dd2916f..73639a521b6a 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -1,4 +1,5 @@ { + symlinkJoin, coreutils, docker, e2fsprogs, @@ -19,6 +20,7 @@ utillinux, vmTools, writeReferencesToFile, + referencesByPopularity, writeScript, writeText, }: @@ -77,7 +79,9 @@ rec { ln -sT ${docker.src}/components/engine/pkg/tarsum src/github.com/docker/docker/pkg/tarsum go build - cp tarsum $out + mkdir -p $out/bin + + cp tarsum $out/bin/ ''; # buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM @@ -270,6 +274,81 @@ rec { perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths ''; + # Create $maxLayers worth of Docker Layers, one layer per store path + # unless there are more paths than $maxLayers. In that case, create + # $maxLayers-1 for the most popular layers, and smush the remainaing + # store paths in to one final layer. + mkManyPureLayers = { + name, + # Files to add to the layer. + closure, + configJson, + # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty + # of room for extension + maxLayers ? 24 + }: + runCommand "${name}-granular-docker-layers" { + inherit maxLayers; + paths = referencesByPopularity closure; + buildInputs = [ jshon rsync tarsum ]; + enableParallelBuilding = true; + } + '' + # Delete impurities for store path layers, so they don't get + # shared and taint other projects. + cat ${configJson} \ + | jshon -d config \ + | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json + + # WARNING! + # The following code is fiddly w.r.t. ensuring every layer is + # created, and that no paths are missed. If you change the + # following head and tail call lines, double-check that your + # code behaves properly when the number of layers equals: + # maxLayers-1, maxLayers, and maxLayers+1 + head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh} + if [ $(cat $paths | wc -l) -ge $maxLayers ]; then + tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers + fi + + echo "Finished building layer '$name'" + + mv ./layers $out + ''; + + # Create a "Customisation" layer which adds symlinks at the root of + # the image to the root paths of the closure. Also add the config + # data like what command to run and the environment to run it in. + mkCustomisationLayer = { + name, + # Files to add to the layer. + contents, + baseJson, + uid ? 0, gid ? 0, + }: + runCommand "${name}-customisation-layer" { + buildInputs = [ jshon rsync tarsum ]; + } + '' + cp -r ${contents}/ ./layer + + # Tar up the layer and throw it into 'layer.tar'. + echo "Packing layer..." + mkdir $out + tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar . + + # Compute a checksum of the tarball. + echo "Computing layer checksum..." + tarhash=$(tarsum < $out/layer.tar) + + # Add a 'checksum' field to the JSON, with the value set to the + # checksum of the tarball. + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json + + # Indicate to docker that we're using schema version 1.0. + echo -n "1.0" > $out/VERSION + ''; + # Create a "layer" (set of files). mkPureLayer = { # Name of the layer @@ -287,7 +366,7 @@ rec { }: runCommand "docker-layer-${name}" { inherit baseJson contents extraCommands; - buildInputs = [ jshon rsync ]; + buildInputs = [ jshon rsync tarsum ]; } '' mkdir layer @@ -314,11 +393,11 @@ rec { # Compute a checksum of the tarball. echo "Computing layer checksum..." - tarsum=$(${tarsum} < $out/layer.tar) + tarhash=$(tarsum < $out/layer.tar) # Add a 'checksum' field to the JSON, with the value set to the # checksum of the tarball. - cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json # Indicate to docker that we're using schema version 1.0. echo -n "1.0" > $out/VERSION @@ -402,8 +481,8 @@ rec { # Compute the tar checksum and add it to the output json. echo "Computing checksum..." - ts=$(${tarsum} < $out/layer.tar) - cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json + tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar) + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json # Indicate to docker that we're using schema version 1.0. echo -n "1.0" > $out/VERSION @@ -411,6 +490,104 @@ rec { ''; }; + buildLayeredImage = { + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Docker config; e.g. what command to run on the container. + config ? {}, + # Time of creation of the image. Passing "now" will make the + # created date be the time of building. + created ? "1970-01-01T00:00:01Z", + # Docker's lowest maximum layer limit is 42-layers for an old + # version of the AUFS graph driver. We pick 24 to ensure there is + # plenty of room for extension. I believe the actual maximum is + # 128. + maxLayers ? 24 + }: + let + uid = 0; + gid = 0; + baseName = baseNameOf name; + contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); }; + + configJson = let + pure = writeText "${baseName}-config.json" (builtins.toJSON { + inherit created config; + architecture = "amd64"; + os = "linux"; + }); + impure = runCommand "${baseName}-standard-dynamic-date.json" + { buildInputs = [ jq ]; } + '' + jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out + ''; + in if created == "now" then impure else pure; + + bulkLayers = mkManyPureLayers { + name = baseName; + closure = writeText "closure" "${contentsEnv} ${configJson}"; + # One layer will be taken up by the customisationLayer, so + # take up one less. + maxLayers = maxLayers - 1; + inherit configJson; + }; + customisationLayer = mkCustomisationLayer { + name = baseName; + contents = contentsEnv; + baseJson = configJson; + inherit uid gid; + }; + result = runCommand "docker-image-${baseName}.tar.gz" { + buildInputs = [ jshon pigz coreutils findutils jq ]; + # Image name and tag must be lowercase + imageName = lib.toLower name; + imageTag = if tag == null then "" else lib.toLower tag; + baseJson = configJson; + } '' + ${lib.optionalString (tag == null) '' + outName="$(basename "$out")" + outHash=$(echo "$outName" | cut -d - -f 1) + + imageTag=$outHash + ''} + + find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list + echo ${customisationLayer} >> layer-list + + mkdir image + imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}") + manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]") + for layer in $(cat layer-list); do + layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1) + layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1) + ln -s "$layer" "./image/$layerID" + + manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .") + imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .") + imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .") + done + imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1) + echo "$imageJson" > "image/$imageJsonChecksum.json" + manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"") + echo "$manifestJson" > image/manifest.json + + jshon -n object \ + -n object -s "$layerID" -i "$imageTag" \ + -i "$imageName" > image/repositories + + echo "Cooking the image..." + tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out + + echo "Finished." + ''; + + in + result; + # 1. extract the base image # 2. create the layer # 3. add layer deps to the layer itself, diffing with the base image diff --git a/pkgs/build-support/docker/examples.nix b/pkgs/build-support/docker/examples.nix index 822e0dbb31f2..003e7429a81b 100644 --- a/pkgs/build-support/docker/examples.nix +++ b/pkgs/build-support/docker/examples.nix @@ -150,4 +150,11 @@ rec { contents = [ pkgs.coreutils ]; created = "now"; }; + + # 10. Create a layered image + layered-image = pkgs.dockerTools.buildLayeredImage { + name = "layered-image"; + tag = "latest"; + config.Cmd = [ "${pkgs.hello}/bin/hello" ]; + }; } diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh new file mode 100755 index 000000000000..ff814c1f6130 --- /dev/null +++ b/pkgs/build-support/docker/store-path-to-layer.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -eu + +layerNumber=$1 +shift + +layerPath="./layers/$layerNumber" +echo "Creating layer #$layerNumber for $@" + +mkdir -p "$layerPath" +tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \ + --mtime="@$SOURCE_DATE_EPOCH" \ + --owner=0 --group=0 "$@" + +# Compute a checksum of the tarball. +tarhash=$(tarsum < $layerPath/layer.tar) + +# Add a 'checksum' field to the JSON, with the value set to the +# checksum of the tarball. +cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json + +# Indicate to docker that we're using schema version 1.0. +echo -n "1.0" > $layerPath/VERSION diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py new file mode 100644 index 000000000000..d67a5dfcf140 --- /dev/null +++ b/pkgs/build-support/references-by-popularity/closure-graph.py @@ -0,0 +1,520 @@ +# IMPORTANT: Making changes? +# +# Validate your changes with python3 ./closure-graph.py --test + + +# Using a simple algorithm, convert the references to a path in to a +# sorted list of dependent paths based on how often they're referenced +# and how deep in the tree they live. Equally-"popular" paths are then +# sorted by name. +# +# The existing writeReferencesToFile prints the paths in a simple +# ascii-based sorting of the paths. +# +# Sorting the paths by graph improves the chances that the difference +# between two builds appear near the end of the list, instead of near +# the beginning. This makes a difference for Nix builds which export a +# closure for another program to consume, if that program implements its +# own level of binary diffing. +# +# For an example, Docker Images. If each store path is a separate layer +# then Docker Images can be very efficiently transfered between systems, +# and we get very good cache reuse between images built with the same +# version of Nixpkgs. However, since Docker only reliably supports a +# small number of layers (42) it is important to pick the individual +# layers carefully. By storing very popular store paths in the first 40 +# layers, we improve the chances that the next Docker image will share +# many of those layers.* +# +# Given the dependency tree: +# +# A - B - C - D -\ +# \ \ \ \ +# \ \ \ \ +# \ \ - E ---- F +# \- G +# +# Nodes which have multiple references are duplicated: +# +# A - B - C - D - F +# \ \ \ +# \ \ \- E - F +# \ \ +# \ \- E - F +# \ +# \- G +# +# Each leaf node is now replaced by a counter defaulted to 1: +# +# A - B - C - D - (F:1) +# \ \ \ +# \ \ \- E - (F:1) +# \ \ +# \ \- E - (F:1) +# \ +# \- (G:1) +# +# Then each leaf counter is merged with its parent node, replacing the +# parent node with a counter of 1, and each existing counter being +# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`: +# +# A - B - C - (D:1, F:2) +# \ \ \ +# \ \ \- (E:1, F:2) +# \ \ +# \ \- (E:1, F:2) +# \ +# \- (G:1) +# +# Then each leaf counter is merged with its parent node again, merging +# any counters, then incrementing each: +# +# A - B - (C:1, D:2, E:2, F:5) +# \ \ +# \ \- (E:1, F:2) +# \ +# \- (G:1) +# +# And again: +# +# A - (B:1, C:2, D:3, E:4, F:8) +# \ +# \- (G:1) +# +# And again: +# +# (A:1, B:2, C:3, D:4, E:5, F:9, G:2) +# +# and then paths have the following "popularity": +# +# A 1 +# B 2 +# C 3 +# D 4 +# E 5 +# F 9 +# G 2 +# +# and the popularity contest would result in the paths being printed as: +# +# F +# E +# D +# C +# B +# G +# A +# +# * Note: People who have used a Dockerfile before assume Docker's +# Layers are inherently ordered. However, this is not true -- Docker +# layers are content-addressable and are not explicitly layered until +# they are composed in to an Image. + +import sys +import json +import unittest + +from pprint import pprint +from collections import defaultdict + +# Find paths in the original dataset which are never referenced by +# any other paths +def find_roots(closures): + roots = []; + + for closure in closures: + path = closure['path'] + if not any_refer_to(path, closures): + roots.append(path) + + return roots + +class TestFindRoots(unittest.TestCase): + def test_find_roots(self): + self.assertCountEqual( + find_roots([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + ["/nix/store/foo", "/nix/store/hello"] + ) + + +def any_refer_to(path, closures): + for closure in closures: + if path != closure['path']: + if path in closure['references']: + return True + return False + +class TestAnyReferTo(unittest.TestCase): + def test_has_references(self): + self.assertTrue( + any_refer_to( + "/nix/store/bar", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/bar" + ] + }, + ] + ), + ) + def test_no_references(self): + self.assertFalse( + any_refer_to( + "/nix/store/foo", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + ] + ), + ) + +def all_paths(closures): + paths = [] + for closure in closures: + paths.append(closure['path']) + paths.extend(closure['references']) + paths.sort() + return list(set(paths)) + + +class TestAllPaths(unittest.TestCase): + def test_returns_all_paths(self): + self.assertCountEqual( + all_paths([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + ["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",] + ) + def test_no_references(self): + self.assertFalse( + any_refer_to( + "/nix/store/foo", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + ] + ), + ) + +# Convert: +# +# [ +# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] }, +# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] }, +# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] }, +# { path: /nix/store/tux, references: [ /nix/store/tux ] } +# ] +# +# To: +# { +# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ], +# /nix/store/bar: [ /nix/store/baz ], +# /nix/store/baz: [ /nix/store/tux ] }, +# /nix/store/tux: [ ] +# } +# +# Note that it drops self-references to avoid loops. +def make_lookup(closures): + lookup = {} + + for closure in closures: + # paths often self-refer + nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']] + lookup[closure['path']] = nonreferential_paths + + return lookup + +class TestMakeLookup(unittest.TestCase): + def test_returns_lookp(self): + self.assertDictEqual( + make_lookup([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/hello": [ ], + } + ) + +# Convert: +# +# /nix/store/foo with +# { +# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ], +# /nix/store/bar: [ /nix/store/baz ], +# /nix/store/baz: [ /nix/store/tux ] }, +# /nix/store/tux: [ ] +# } +# +# To: +# +# { +# /nix/store/bar: { +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# }, +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +def make_graph_segment_from_root(root, lookup): + children = {} + for ref in lookup[root]: + children[ref] = make_graph_segment_from_root(ref, lookup) + return children + +class TestMakeGraphSegmentFromRoot(unittest.TestCase): + def test_returns_graph(self): + self.assertDictEqual( + make_graph_segment_from_root("/nix/store/foo", { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/tux": [ ], + "/nix/store/hello": [ ], + }), + { + "/nix/store/bar": { + "/nix/store/tux": {} + } + } + ) + def test_returns_graph_tiny(self): + self.assertDictEqual( + make_graph_segment_from_root("/nix/store/tux", { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/tux": [ ], + }), + {} + ) + +# Convert a graph segment in to a popularity-counted dictionary: +# +# From: +# { +# /nix/store/foo: { +# /nix/store/bar: { +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +# } +# +# to: +# [ +# /nix/store/foo: 1 +# /nix/store/bar: 2 +# /nix/store/baz: 4 +# /nix/store/tux: 6 +# ] +def graph_popularity_contest(full_graph): + popularity = defaultdict(int) + for path, subgraph in full_graph.items(): + popularity[path] += 1 + subcontest = graph_popularity_contest(subgraph) + for subpath, subpopularity in subcontest.items(): + popularity[subpath] += subpopularity + 1 + + return popularity + +class TestGraphPopularityContest(unittest.TestCase): + def test_counts_popularity(self): + self.assertDictEqual( + graph_popularity_contest({ + "/nix/store/foo": { + "/nix/store/bar": { + "/nix/store/baz": { + "/nix/store/tux": {} + } + }, + "/nix/store/baz": { + "/nix/store/tux": {} + } + } + }), + { + "/nix/store/foo": 1, + "/nix/store/bar": 2, + "/nix/store/baz": 4, + "/nix/store/tux": 6, + } + ) + +# Emit a list of packages by popularity, most first: +# +# From: +# [ +# /nix/store/foo: 1 +# /nix/store/bar: 1 +# /nix/store/baz: 2 +# /nix/store/tux: 2 +# ] +# +# To: +# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ] +def order_by_popularity(paths): + paths_by_popularity = defaultdict(list) + popularities = [] + for path, popularity in paths.items(): + popularities.append(popularity) + paths_by_popularity[popularity].append(path) + + popularities = list(set(popularities)) + popularities.sort() + + flat_ordered = [] + for popularity in popularities: + paths = paths_by_popularity[popularity] + paths.sort(key=package_name) + + flat_ordered.extend(reversed(paths)) + return list(reversed(flat_ordered)) + + +class TestOrderByPopularity(unittest.TestCase): + def test_returns_in_order(self): + self.assertEqual( + order_by_popularity({ + "/nix/store/foo": 1, + "/nix/store/bar": 1, + "/nix/store/baz": 2, + "/nix/store/tux": 2, + }), + [ + "/nix/store/baz", + "/nix/store/tux", + "/nix/store/bar", + "/nix/store/foo" + ] + ) + +def package_name(path): + parts = path.split('-') + start = parts.pop(0) + # don't throw away any data, so the order is always the same. + # even in cases where only the hash at the start has changed. + parts.append(start) + return '-'.join(parts) + +def main(): + filename = sys.argv[1] + key = sys.argv[2] + + with open(filename) as f: + data = json.load(f) + + # Data comes in as: + # [ + # { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] }, + # { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] }, + # { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] }, + # { path: /nix/store/tux, references: [ /nix/store/tux ] } + # ] + # + # and we want to get out a list of paths ordered by how universally, + # important they are, ie: tux is referenced by every path, transitively + # so it should be #1 + # + # [ + # /nix/store/tux, + # /nix/store/baz, + # /nix/store/bar, + # /nix/store/foo, + # ] + graph = data[key] + + roots = find_roots(graph); + lookup = make_lookup(graph) + + full_graph = {} + for root in roots: + full_graph[root] = make_graph_segment_from_root(root, lookup) + + ordered = order_by_popularity(graph_popularity_contest(full_graph)) + missing = [] + for path in all_paths(graph): + if path not in ordered: + missing.append(path) + + ordered.extend(missing) + print("\n".join(ordered)) + +if "--test" in sys.argv: + # Don't pass --test otherwise unittest gets mad + unittest.main(argv = [f for f in sys.argv if f != "--test" ]) +else: + main() diff --git a/pkgs/build-support/references-by-popularity/default.nix b/pkgs/build-support/references-by-popularity/default.nix new file mode 100644 index 000000000000..4cae2dcf3ca9 --- /dev/null +++ b/pkgs/build-support/references-by-popularity/default.nix @@ -0,0 +1,15 @@ +{ runCommand, python3, coreutils }: +# Write the references of `path' to a file, in order of how "popular" each +# reference is. Nix 2 only. +path: runCommand "closure-paths" +{ + exportReferencesGraph.graph = path; + __structuredAttrs = true; + PATH = "${coreutils}/bin:${python3}/bin"; + builder = builtins.toFile "builder" + '' + . .attrs.sh + python3 ${./closure-graph.py} .attrs.json graph > ''${outputs[out]} + ''; + } + "" diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index c8d15dbd4063..33a2b6455a23 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -365,6 +365,8 @@ with pkgs; nukeReferences = callPackage ../build-support/nuke-references { }; + referencesByPopularity = callPackage ../build-support/references-by-popularity { }; + removeReferencesTo = callPackage ../build-support/remove-references-to { }; vmTools = callPackage ../build-support/vm { };