From 2bf0ee3b2b47562a31e8d9f9c8fe903f67dbfe5c Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Tue, 25 Sep 2018 13:54:45 -0400 Subject: [PATCH 1/5] dockertools: tarsum: turn in to a buildInput --- pkgs/build-support/docker/default.nix | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 0cee1dd2916f..6b5a06486e79 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -77,7 +77,9 @@ rec { ln -sT ${docker.src}/components/engine/pkg/tarsum src/github.com/docker/docker/pkg/tarsum go build - cp tarsum $out + mkdir -p $out/bin + + cp tarsum $out/bin/ ''; # buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM @@ -287,7 +289,7 @@ rec { }: runCommand "docker-layer-${name}" { inherit baseJson contents extraCommands; - buildInputs = [ jshon rsync ]; + buildInputs = [ jshon rsync tarsum ]; } '' mkdir layer @@ -314,11 +316,11 @@ rec { # Compute a checksum of the tarball. echo "Computing layer checksum..." - tarsum=$(${tarsum} < $out/layer.tar) + tarhash=$(tarsum < $out/layer.tar) # Add a 'checksum' field to the JSON, with the value set to the # checksum of the tarball. - cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json # Indicate to docker that we're using schema version 1.0. echo -n "1.0" > $out/VERSION @@ -402,8 +404,8 @@ rec { # Compute the tar checksum and add it to the output json. echo "Computing checksum..." - ts=$(${tarsum} < $out/layer.tar) - cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json + tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar) + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json # Indicate to docker that we're using schema version 1.0. echo -n "1.0" > $out/VERSION From fd045173cef84e65a8cb133ded28c99167cb0901 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Mon, 24 Sep 2018 16:00:33 -0400 Subject: [PATCH 2/5] referencesByPopularity: init to sort packages by a cachability heuristic Using a simple algorithm, convert the references to a path in to a sorted list of dependent paths based on how often they're referenced and how deep in the tree they live. Equally-"popular" paths are then sorted by name. The existing writeReferencesToFile prints the paths in a simple ascii-based sorting of the paths. Sorting the paths by graph improves the chances that the difference between two builds appear near the end of the list, instead of near the beginning. This makes a difference for Nix builds which export a closure for another program to consume, if that program implements its own level of binary diffing. For an example, Docker Images. If each store path is a separate layer then Docker Images can be very efficiently transfered between systems, and we get very good cache reuse between images built with the same version of Nixpkgs. However, since Docker only reliably supports a small number of layers (42) it is important to pick the individual layers carefully. By storing very popular store paths in the first 40 layers, we improve the chances that the next Docker image will share many of those layers.* Given the dependency tree: A - B - C - D -\ \ \ \ \ \ \ \ \ \ \ - E ---- F \- G Nodes which have multiple references are duplicated: A - B - C - D - F \ \ \ \ \ \- E - F \ \ \ \- E - F \ \- G Each leaf node is now replaced by a counter defaulted to 1: A - B - C - D - (F:1) \ \ \ \ \ \- E - (F:1) \ \ \ \- E - (F:1) \ \- (G:1) Then each leaf counter is merged with its parent node, replacing the parent node with a counter of 1, and each existing counter being incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`: A - B - C - (D:1, F:2) \ \ \ \ \ \- (E:1, F:2) \ \ \ \- (E:1, F:2) \ \- (G:1) Then each leaf counter is merged with its parent node again, merging any counters, then incrementing each: A - B - (C:1, D:2, E:2, F:5) \ \ \ \- (E:1, F:2) \ \- (G:1) And again: A - (B:1, C:2, D:3, E:4, F:8) \ \- (G:1) And again: (A:1, B:2, C:3, D:4, E:5, F:9, G:2) and then paths have the following "popularity": A 1 B 2 C 3 D 4 E 5 F 9 G 2 and the popularity contest would result in the paths being printed as: F E D C B G A * Note: People who have used a Dockerfile before assume Docker's Layers are inherently ordered. However, this is not true -- Docker layers are content-addressable and are not explicitly layered until they are composed in to an Image. --- .../references-by-popularity/closure-graph.py | 520 ++++++++++++++++++ .../references-by-popularity/default.nix | 15 + pkgs/top-level/all-packages.nix | 2 + 3 files changed, 537 insertions(+) create mode 100644 pkgs/build-support/references-by-popularity/closure-graph.py create mode 100644 pkgs/build-support/references-by-popularity/default.nix diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py new file mode 100644 index 000000000000..d67a5dfcf140 --- /dev/null +++ b/pkgs/build-support/references-by-popularity/closure-graph.py @@ -0,0 +1,520 @@ +# IMPORTANT: Making changes? +# +# Validate your changes with python3 ./closure-graph.py --test + + +# Using a simple algorithm, convert the references to a path in to a +# sorted list of dependent paths based on how often they're referenced +# and how deep in the tree they live. Equally-"popular" paths are then +# sorted by name. +# +# The existing writeReferencesToFile prints the paths in a simple +# ascii-based sorting of the paths. +# +# Sorting the paths by graph improves the chances that the difference +# between two builds appear near the end of the list, instead of near +# the beginning. This makes a difference for Nix builds which export a +# closure for another program to consume, if that program implements its +# own level of binary diffing. +# +# For an example, Docker Images. If each store path is a separate layer +# then Docker Images can be very efficiently transfered between systems, +# and we get very good cache reuse between images built with the same +# version of Nixpkgs. However, since Docker only reliably supports a +# small number of layers (42) it is important to pick the individual +# layers carefully. By storing very popular store paths in the first 40 +# layers, we improve the chances that the next Docker image will share +# many of those layers.* +# +# Given the dependency tree: +# +# A - B - C - D -\ +# \ \ \ \ +# \ \ \ \ +# \ \ - E ---- F +# \- G +# +# Nodes which have multiple references are duplicated: +# +# A - B - C - D - F +# \ \ \ +# \ \ \- E - F +# \ \ +# \ \- E - F +# \ +# \- G +# +# Each leaf node is now replaced by a counter defaulted to 1: +# +# A - B - C - D - (F:1) +# \ \ \ +# \ \ \- E - (F:1) +# \ \ +# \ \- E - (F:1) +# \ +# \- (G:1) +# +# Then each leaf counter is merged with its parent node, replacing the +# parent node with a counter of 1, and each existing counter being +# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`: +# +# A - B - C - (D:1, F:2) +# \ \ \ +# \ \ \- (E:1, F:2) +# \ \ +# \ \- (E:1, F:2) +# \ +# \- (G:1) +# +# Then each leaf counter is merged with its parent node again, merging +# any counters, then incrementing each: +# +# A - B - (C:1, D:2, E:2, F:5) +# \ \ +# \ \- (E:1, F:2) +# \ +# \- (G:1) +# +# And again: +# +# A - (B:1, C:2, D:3, E:4, F:8) +# \ +# \- (G:1) +# +# And again: +# +# (A:1, B:2, C:3, D:4, E:5, F:9, G:2) +# +# and then paths have the following "popularity": +# +# A 1 +# B 2 +# C 3 +# D 4 +# E 5 +# F 9 +# G 2 +# +# and the popularity contest would result in the paths being printed as: +# +# F +# E +# D +# C +# B +# G +# A +# +# * Note: People who have used a Dockerfile before assume Docker's +# Layers are inherently ordered. However, this is not true -- Docker +# layers are content-addressable and are not explicitly layered until +# they are composed in to an Image. + +import sys +import json +import unittest + +from pprint import pprint +from collections import defaultdict + +# Find paths in the original dataset which are never referenced by +# any other paths +def find_roots(closures): + roots = []; + + for closure in closures: + path = closure['path'] + if not any_refer_to(path, closures): + roots.append(path) + + return roots + +class TestFindRoots(unittest.TestCase): + def test_find_roots(self): + self.assertCountEqual( + find_roots([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + ["/nix/store/foo", "/nix/store/hello"] + ) + + +def any_refer_to(path, closures): + for closure in closures: + if path != closure['path']: + if path in closure['references']: + return True + return False + +class TestAnyReferTo(unittest.TestCase): + def test_has_references(self): + self.assertTrue( + any_refer_to( + "/nix/store/bar", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/bar" + ] + }, + ] + ), + ) + def test_no_references(self): + self.assertFalse( + any_refer_to( + "/nix/store/foo", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + ] + ), + ) + +def all_paths(closures): + paths = [] + for closure in closures: + paths.append(closure['path']) + paths.extend(closure['references']) + paths.sort() + return list(set(paths)) + + +class TestAllPaths(unittest.TestCase): + def test_returns_all_paths(self): + self.assertCountEqual( + all_paths([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + ["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",] + ) + def test_no_references(self): + self.assertFalse( + any_refer_to( + "/nix/store/foo", + [ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + ] + ), + ) + +# Convert: +# +# [ +# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] }, +# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] }, +# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] }, +# { path: /nix/store/tux, references: [ /nix/store/tux ] } +# ] +# +# To: +# { +# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ], +# /nix/store/bar: [ /nix/store/baz ], +# /nix/store/baz: [ /nix/store/tux ] }, +# /nix/store/tux: [ ] +# } +# +# Note that it drops self-references to avoid loops. +def make_lookup(closures): + lookup = {} + + for closure in closures: + # paths often self-refer + nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']] + lookup[closure['path']] = nonreferential_paths + + return lookup + +class TestMakeLookup(unittest.TestCase): + def test_returns_lookp(self): + self.assertDictEqual( + make_lookup([ + { + "path": "/nix/store/foo", + "references": [ + "/nix/store/foo", + "/nix/store/bar" + ] + }, + { + "path": "/nix/store/bar", + "references": [ + "/nix/store/bar", + "/nix/store/tux" + ] + }, + { + "path": "/nix/store/hello", + "references": [ + ] + } + ]), + { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/hello": [ ], + } + ) + +# Convert: +# +# /nix/store/foo with +# { +# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ], +# /nix/store/bar: [ /nix/store/baz ], +# /nix/store/baz: [ /nix/store/tux ] }, +# /nix/store/tux: [ ] +# } +# +# To: +# +# { +# /nix/store/bar: { +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# }, +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +def make_graph_segment_from_root(root, lookup): + children = {} + for ref in lookup[root]: + children[ref] = make_graph_segment_from_root(ref, lookup) + return children + +class TestMakeGraphSegmentFromRoot(unittest.TestCase): + def test_returns_graph(self): + self.assertDictEqual( + make_graph_segment_from_root("/nix/store/foo", { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/tux": [ ], + "/nix/store/hello": [ ], + }), + { + "/nix/store/bar": { + "/nix/store/tux": {} + } + } + ) + def test_returns_graph_tiny(self): + self.assertDictEqual( + make_graph_segment_from_root("/nix/store/tux", { + "/nix/store/foo": [ "/nix/store/bar" ], + "/nix/store/bar": [ "/nix/store/tux" ], + "/nix/store/tux": [ ], + }), + {} + ) + +# Convert a graph segment in to a popularity-counted dictionary: +# +# From: +# { +# /nix/store/foo: { +# /nix/store/bar: { +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +# /nix/store/baz: { +# /nix/store/tux: {} +# } +# } +# } +# +# to: +# [ +# /nix/store/foo: 1 +# /nix/store/bar: 2 +# /nix/store/baz: 4 +# /nix/store/tux: 6 +# ] +def graph_popularity_contest(full_graph): + popularity = defaultdict(int) + for path, subgraph in full_graph.items(): + popularity[path] += 1 + subcontest = graph_popularity_contest(subgraph) + for subpath, subpopularity in subcontest.items(): + popularity[subpath] += subpopularity + 1 + + return popularity + +class TestGraphPopularityContest(unittest.TestCase): + def test_counts_popularity(self): + self.assertDictEqual( + graph_popularity_contest({ + "/nix/store/foo": { + "/nix/store/bar": { + "/nix/store/baz": { + "/nix/store/tux": {} + } + }, + "/nix/store/baz": { + "/nix/store/tux": {} + } + } + }), + { + "/nix/store/foo": 1, + "/nix/store/bar": 2, + "/nix/store/baz": 4, + "/nix/store/tux": 6, + } + ) + +# Emit a list of packages by popularity, most first: +# +# From: +# [ +# /nix/store/foo: 1 +# /nix/store/bar: 1 +# /nix/store/baz: 2 +# /nix/store/tux: 2 +# ] +# +# To: +# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ] +def order_by_popularity(paths): + paths_by_popularity = defaultdict(list) + popularities = [] + for path, popularity in paths.items(): + popularities.append(popularity) + paths_by_popularity[popularity].append(path) + + popularities = list(set(popularities)) + popularities.sort() + + flat_ordered = [] + for popularity in popularities: + paths = paths_by_popularity[popularity] + paths.sort(key=package_name) + + flat_ordered.extend(reversed(paths)) + return list(reversed(flat_ordered)) + + +class TestOrderByPopularity(unittest.TestCase): + def test_returns_in_order(self): + self.assertEqual( + order_by_popularity({ + "/nix/store/foo": 1, + "/nix/store/bar": 1, + "/nix/store/baz": 2, + "/nix/store/tux": 2, + }), + [ + "/nix/store/baz", + "/nix/store/tux", + "/nix/store/bar", + "/nix/store/foo" + ] + ) + +def package_name(path): + parts = path.split('-') + start = parts.pop(0) + # don't throw away any data, so the order is always the same. + # even in cases where only the hash at the start has changed. + parts.append(start) + return '-'.join(parts) + +def main(): + filename = sys.argv[1] + key = sys.argv[2] + + with open(filename) as f: + data = json.load(f) + + # Data comes in as: + # [ + # { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] }, + # { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] }, + # { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] }, + # { path: /nix/store/tux, references: [ /nix/store/tux ] } + # ] + # + # and we want to get out a list of paths ordered by how universally, + # important they are, ie: tux is referenced by every path, transitively + # so it should be #1 + # + # [ + # /nix/store/tux, + # /nix/store/baz, + # /nix/store/bar, + # /nix/store/foo, + # ] + graph = data[key] + + roots = find_roots(graph); + lookup = make_lookup(graph) + + full_graph = {} + for root in roots: + full_graph[root] = make_graph_segment_from_root(root, lookup) + + ordered = order_by_popularity(graph_popularity_contest(full_graph)) + missing = [] + for path in all_paths(graph): + if path not in ordered: + missing.append(path) + + ordered.extend(missing) + print("\n".join(ordered)) + +if "--test" in sys.argv: + # Don't pass --test otherwise unittest gets mad + unittest.main(argv = [f for f in sys.argv if f != "--test" ]) +else: + main() diff --git a/pkgs/build-support/references-by-popularity/default.nix b/pkgs/build-support/references-by-popularity/default.nix new file mode 100644 index 000000000000..4cae2dcf3ca9 --- /dev/null +++ b/pkgs/build-support/references-by-popularity/default.nix @@ -0,0 +1,15 @@ +{ runCommand, python3, coreutils }: +# Write the references of `path' to a file, in order of how "popular" each +# reference is. Nix 2 only. +path: runCommand "closure-paths" +{ + exportReferencesGraph.graph = path; + __structuredAttrs = true; + PATH = "${coreutils}/bin:${python3}/bin"; + builder = builtins.toFile "builder" + '' + . .attrs.sh + python3 ${./closure-graph.py} .attrs.json graph > ''${outputs[out]} + ''; + } + "" diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 500a713f22ba..f90c9c254459 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -366,6 +366,8 @@ with pkgs; nukeReferences = callPackage ../build-support/nuke-references { }; + referencesByPopularity = callPackage ../build-support/references-by-popularity { }; + removeReferencesTo = callPackage ../build-support/remove-references-to { }; vmTools = callPackage ../build-support/vm { }; From 4fe900619080f0b1a804abbccaab500d819ead10 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Tue, 25 Sep 2018 10:53:42 -0400 Subject: [PATCH 3/5] dockerTools.buildLayeredImage: init Create a many-layered Docker Image. Implements much less than buildImage: - Doesn't support specific uids/gids - Doesn't support runninng commands after building - Doesn't require qemu - Doesn't create mutable copies of the files in the path - Doesn't support parent images If you want those feature, I recommend using buildLayeredImage as an input to buildImage. Notably, it does support: - Caching low level, common paths based on a graph traversial algorithm, see referencesByPopularity in 0a80233487993256e811f566b1c80a40394c03d6 - Configurable number of layers. If you're not using AUFS or not extending the image, you can specify a larger number of layers at build time: pkgs.dockerTools.buildLayeredImage { name = "hello"; maxLayers = 128; config.Cmd = [ "${pkgs.gitFull}/bin/git" ]; }; - Parallelized creation of the layers, improving build speed. - The contents of the image includes the closure of the configuration, so you don't have to specify paths in contents and config. With buildImage, paths referred to by the config were not included automatically in the image. Thus, if you wanted to call Git, you had to specify it twice: pkgs.dockerTools.buildImage { name = "hello"; contents = [ pkgs.gitFull ]; config.Cmd = [ "${pkgs.gitFull}/bin/git" ]; }; buildLayeredImage on the other hand includes the runtime closure of the config when calculating the contents of the image: pkgs.dockerTools.buildImage { name = "hello"; config.Cmd = [ "${pkgs.gitFull}/bin/git" ]; }; Minor Problems - If any of the store paths change, every layer will be rebuilt in the nix-build. However, beacuse the layers are bit-for-bit reproducable, when these images are loaded in to Docker they will match existing layers and not be imported or uploaded twice. Common Questions - Aren't Docker layers ordered? No. People who have used a Dockerfile before assume Docker's Layers are inherently ordered. However, this is not true -- Docker layers are content-addressable and are not explicitly layered until they are composed in to an Image. - What happens if I have more than maxLayers of store paths? The first (maxLayers-2) most "popular" paths will have their own individual layers, then layer #(maxLayers-1) will contain all the remaining "unpopular" paths, and finally layer #(maxLayers) will contain the Image configuration. --- doc/functions.xml | 177 ++++++++++++++++++ pkgs/build-support/docker/default.nix | 175 +++++++++++++++++ .../docker/store-path-to-layer.sh | 24 +++ 3 files changed, 376 insertions(+) create mode 100755 pkgs/build-support/docker/store-path-to-layer.sh diff --git a/doc/functions.xml b/doc/functions.xml index 0c0d82b0342c..c4cbf0adb203 100644 --- a/doc/functions.xml +++ b/doc/functions.xml @@ -682,6 +682,183 @@ hello latest de2bf4786de6 About a minute ago 25.2MB +
+ buildLayeredImage + + + Create a Docker image with many of the store paths being on their own layer + to improve sharing between images. + + + + + + name + + + + The name of the resulting image. + + + + + + tag optional + + + + Tag of the generated image. + + + Default: the output path's hash + + + + + + contents optional + + + + Top level paths in the container. Either a single derivation, or a list + of derivations. + + + Default: [] + + + + + + config optional + + + + Run-time configuration of the container. A full list of the options are + available at in the + + Docker Image Specification v1.2.0 . + + + Default: {} + + + + + + created optional + + + + Date and time the layers were created. Follows the same + now exception supported by + buildImage. + + + Default: 1970-01-01T00:00:01Z + + + + + + maxLayers optional + + + + Maximum number of layers to create. + + + Default: 24 + + + + + +
+ Behavior of <varname>contents</varname> in the final image + + + Each path directly listed in contents will have a + symlink in the root of the image. + + + + For example: + + will create symlinks for all the paths in the hello + package: + /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello +/share/info/hello.info -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/hello.info +/share/locale/bg/LC_MESSAGES/hello.mo -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/hello.mo +]]> + +
+ +
+ Automatic inclusion of <varname>config</varname> references + + + The closure of config is automatically included in the + closure of the final image. + + + + This is different from pkgs.dockerTools.buildImage + which does not automatically include the + configuration's closure. + + + + This allows you to make very simple Docker images with very little code. + This container will start up and run hello: + + +
+ +
+ Adjusting <varname>maxLayers</varname> + + + Increasing the maxLayers increases the number of layers + which have a chance to be shared between different images. + + + + Modern Docker installations support up to 128 layers, however older + versions support as few as 42. + + + + If the produced image will not be extended by other Docker builds, it is + safe to set maxLayers to 128. + However it will be impossible to extend the image further. + + + + The first (maxLayers-2) most "popular" paths will have + their own individual layers, then layer #maxLayers-1 + will contain all the remaining "unpopular" paths, and finally layer + #maxLayers will contain the Image configuration. + + + + Docker's Layers are not inherently ordered, they are content-addressable + and are not explicitly layered until they are composed in to an Image. + +
+
+
pullImage diff --git a/pkgs/build-support/docker/default.nix b/pkgs/build-support/docker/default.nix index 6b5a06486e79..73639a521b6a 100644 --- a/pkgs/build-support/docker/default.nix +++ b/pkgs/build-support/docker/default.nix @@ -1,4 +1,5 @@ { + symlinkJoin, coreutils, docker, e2fsprogs, @@ -19,6 +20,7 @@ utillinux, vmTools, writeReferencesToFile, + referencesByPopularity, writeScript, writeText, }: @@ -272,6 +274,81 @@ rec { perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths ''; + # Create $maxLayers worth of Docker Layers, one layer per store path + # unless there are more paths than $maxLayers. In that case, create + # $maxLayers-1 for the most popular layers, and smush the remainaing + # store paths in to one final layer. + mkManyPureLayers = { + name, + # Files to add to the layer. + closure, + configJson, + # Docker has a 42-layer maximum, we pick 24 to ensure there is plenty + # of room for extension + maxLayers ? 24 + }: + runCommand "${name}-granular-docker-layers" { + inherit maxLayers; + paths = referencesByPopularity closure; + buildInputs = [ jshon rsync tarsum ]; + enableParallelBuilding = true; + } + '' + # Delete impurities for store path layers, so they don't get + # shared and taint other projects. + cat ${configJson} \ + | jshon -d config \ + | jshon -s "1970-01-01T00:00:01Z" -i created > generic.json + + # WARNING! + # The following code is fiddly w.r.t. ensuring every layer is + # created, and that no paths are missed. If you change the + # following head and tail call lines, double-check that your + # code behaves properly when the number of layers equals: + # maxLayers-1, maxLayers, and maxLayers+1 + head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh} + if [ $(cat $paths | wc -l) -ge $maxLayers ]; then + tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers + fi + + echo "Finished building layer '$name'" + + mv ./layers $out + ''; + + # Create a "Customisation" layer which adds symlinks at the root of + # the image to the root paths of the closure. Also add the config + # data like what command to run and the environment to run it in. + mkCustomisationLayer = { + name, + # Files to add to the layer. + contents, + baseJson, + uid ? 0, gid ? 0, + }: + runCommand "${name}-customisation-layer" { + buildInputs = [ jshon rsync tarsum ]; + } + '' + cp -r ${contents}/ ./layer + + # Tar up the layer and throw it into 'layer.tar'. + echo "Packing layer..." + mkdir $out + tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar . + + # Compute a checksum of the tarball. + echo "Computing layer checksum..." + tarhash=$(tarsum < $out/layer.tar) + + # Add a 'checksum' field to the JSON, with the value set to the + # checksum of the tarball. + cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json + + # Indicate to docker that we're using schema version 1.0. + echo -n "1.0" > $out/VERSION + ''; + # Create a "layer" (set of files). mkPureLayer = { # Name of the layer @@ -413,6 +490,104 @@ rec { ''; }; + buildLayeredImage = { + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Docker config; e.g. what command to run on the container. + config ? {}, + # Time of creation of the image. Passing "now" will make the + # created date be the time of building. + created ? "1970-01-01T00:00:01Z", + # Docker's lowest maximum layer limit is 42-layers for an old + # version of the AUFS graph driver. We pick 24 to ensure there is + # plenty of room for extension. I believe the actual maximum is + # 128. + maxLayers ? 24 + }: + let + uid = 0; + gid = 0; + baseName = baseNameOf name; + contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); }; + + configJson = let + pure = writeText "${baseName}-config.json" (builtins.toJSON { + inherit created config; + architecture = "amd64"; + os = "linux"; + }); + impure = runCommand "${baseName}-standard-dynamic-date.json" + { buildInputs = [ jq ]; } + '' + jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out + ''; + in if created == "now" then impure else pure; + + bulkLayers = mkManyPureLayers { + name = baseName; + closure = writeText "closure" "${contentsEnv} ${configJson}"; + # One layer will be taken up by the customisationLayer, so + # take up one less. + maxLayers = maxLayers - 1; + inherit configJson; + }; + customisationLayer = mkCustomisationLayer { + name = baseName; + contents = contentsEnv; + baseJson = configJson; + inherit uid gid; + }; + result = runCommand "docker-image-${baseName}.tar.gz" { + buildInputs = [ jshon pigz coreutils findutils jq ]; + # Image name and tag must be lowercase + imageName = lib.toLower name; + imageTag = if tag == null then "" else lib.toLower tag; + baseJson = configJson; + } '' + ${lib.optionalString (tag == null) '' + outName="$(basename "$out")" + outHash=$(echo "$outName" | cut -d - -f 1) + + imageTag=$outHash + ''} + + find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list + echo ${customisationLayer} >> layer-list + + mkdir image + imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}") + manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]") + for layer in $(cat layer-list); do + layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1) + layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1) + ln -s "$layer" "./image/$layerID" + + manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .") + imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .") + imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .") + done + imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1) + echo "$imageJson" > "image/$imageJsonChecksum.json" + manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"") + echo "$manifestJson" > image/manifest.json + + jshon -n object \ + -n object -s "$layerID" -i "$imageTag" \ + -i "$imageName" > image/repositories + + echo "Cooking the image..." + tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out + + echo "Finished." + ''; + + in + result; + # 1. extract the base image # 2. create the layer # 3. add layer deps to the layer itself, diffing with the base image diff --git a/pkgs/build-support/docker/store-path-to-layer.sh b/pkgs/build-support/docker/store-path-to-layer.sh new file mode 100755 index 000000000000..ff814c1f6130 --- /dev/null +++ b/pkgs/build-support/docker/store-path-to-layer.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -eu + +layerNumber=$1 +shift + +layerPath="./layers/$layerNumber" +echo "Creating layer #$layerNumber for $@" + +mkdir -p "$layerPath" +tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \ + --mtime="@$SOURCE_DATE_EPOCH" \ + --owner=0 --group=0 "$@" + +# Compute a checksum of the tarball. +tarhash=$(tarsum < $layerPath/layer.tar) + +# Add a 'checksum' field to the JSON, with the value set to the +# checksum of the tarball. +cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json + +# Indicate to docker that we're using schema version 1.0. +echo -n "1.0" > $layerPath/VERSION From d1e46df24bae37d69e6eda09bee933f22acbe0ce Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 27 Sep 2018 08:14:04 -0400 Subject: [PATCH 4/5] fixup: drop comment about config behaving differently from buildImage --- doc/functions.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/functions.xml b/doc/functions.xml index c4cbf0adb203..8223a8b0531c 100644 --- a/doc/functions.xml +++ b/doc/functions.xml @@ -808,12 +808,6 @@ pkgs.dockerTools.buildLayeredImage { closure of the final image. - - This is different from pkgs.dockerTools.buildImage - which does not automatically include the - configuration's closure. - - This allows you to make very simple Docker images with very little code. This container will start up and run hello: From fb2d153dac13f37e2b71811aa8600f99b758a73e Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 27 Sep 2018 14:16:23 -0400 Subject: [PATCH 5/5] dockerTools: test buildLayeredImage --- nixos/tests/docker-tools.nix | 4 ++++ pkgs/build-support/docker/examples.nix | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/nixos/tests/docker-tools.nix b/nixos/tests/docker-tools.nix index 5a7590cbf364..360b32faae72 100644 --- a/nixos/tests/docker-tools.nix +++ b/nixos/tests/docker-tools.nix @@ -58,5 +58,9 @@ import ./make-test.nix ({ pkgs, ... }: { # Ensure Docker images can use an unstable date $docker->succeed("docker load --input='${pkgs.dockerTools.examples.bash}'"); $docker->succeed("[ '1970-01-01T00:00:01Z' != \"\$(docker inspect ${pkgs.dockerTools.examples.unstableDate.imageName} | ${pkgs.jq}/bin/jq -r .[].Created)\" ]"); + + # Ensure Layered Docker images work + $docker->succeed("docker load --input='${pkgs.dockerTools.examples.layered-image}'"); + $docker->succeed("docker run --rm ${pkgs.dockerTools.examples.layered-image.imageName}"); ''; }) diff --git a/pkgs/build-support/docker/examples.nix b/pkgs/build-support/docker/examples.nix index 822e0dbb31f2..003e7429a81b 100644 --- a/pkgs/build-support/docker/examples.nix +++ b/pkgs/build-support/docker/examples.nix @@ -150,4 +150,11 @@ rec { contents = [ pkgs.coreutils ]; created = "now"; }; + + # 10. Create a layered image + layered-image = pkgs.dockerTools.buildLayeredImage { + name = "layered-image"; + tag = "latest"; + config.Cmd = [ "${pkgs.hello}/bin/hello" ]; + }; }