diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc index 0ebd750f6..35ce0ac36 100644 --- a/src/libutil/fs-sink.cc +++ b/src/libutil/fs-sink.cc @@ -5,6 +5,52 @@ namespace nix { +void copyRecursive( + SourceAccessor & accessor, const CanonPath & from, + FileSystemObjectSink & sink, const Path & to) +{ + auto stat = accessor.lstat(from); + + switch (stat.type) { + case SourceAccessor::tSymlink: + { + sink.createSymlink(to, accessor.readLink(from)); + break; + } + + case SourceAccessor::tRegular: + { + sink.createRegularFile(to, [&](CreateRegularFileSink & crf) { + if (stat.isExecutable) + crf.isExecutable(); + accessor.readFile(from, crf, [&](uint64_t size) { + crf.preallocateContents(size); + }); + }); + break; + } + + case SourceAccessor::tDirectory: + { + sink.createDirectory(to); + for (auto & [name, _] : accessor.readDirectory(from)) { + copyRecursive( + accessor, from / name, + sink, to + "/" + name); + break; + } + break; + } + + case SourceAccessor::tMisc: + throw Error("file '%1%' has an unsupported type", from); + + default: + abort(); + } +} + + struct RestoreSinkSettings : Config { Setting preallocateContents{this, false, "preallocate-contents", diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index 670b55c2b..ae577819a 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -41,6 +41,13 @@ struct FileSystemObjectSink virtual void createSymlink(const Path & path, const std::string & target) = 0; }; +/** + * Recursively copy file system objects from the source into the sink. + */ +void copyRecursive( + SourceAccessor & accessor, const CanonPath & sourcePath, + FileSystemObjectSink & sink, const Path & destPath); + /** * Ignore everything and do nothing */ diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 029e1af44..5733531fa 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -5,13 +5,302 @@ #include #include // for strcasecmp +#include "signals.hh" +#include "config.hh" +#include "hash.hh" +#include "posix-source-accessor.hh" + #include "git.hh" +#include "serialise.hh" namespace nix::git { using namespace nix; using namespace std::string_literals; +std::optional decodeMode(RawMode m) { + switch (m) { + case (RawMode) Mode::Directory: + case (RawMode) Mode::Executable: + case (RawMode) Mode::Regular: + case (RawMode) Mode::Symlink: + return (Mode) m; + default: + return std::nullopt; + } +} + + +static std::string getStringUntil(Source & source, char byte) +{ + std::string s; + char n[1]; + source(std::string_view { n, 1 }); + while (*n != byte) { + s += *n; + source(std::string_view { n, 1 }); + } + return s; +} + + +static std::string getString(Source & source, int n) +{ + std::string v; + v.resize(n); + source(v); + return v; +} + +void parseBlob( + FileSystemObjectSink & sink, + const Path & sinkPath, + Source & source, + bool executable, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + sink.createRegularFile(sinkPath, [&](auto & crf) { + if (executable) + crf.isExecutable(); + + unsigned long long size = std::stoi(getStringUntil(source, 0)); + + crf.preallocateContents(size); + + unsigned long long left = size; + std::string buf; + buf.reserve(65536); + + while (left) { + checkInterrupt(); + buf.resize(std::min((unsigned long long)buf.capacity(), left)); + source(buf); + crf(buf); + left -= buf.size(); + } + }); +} + +void parseTree( + FileSystemObjectSink & sink, + const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings) +{ + unsigned long long size = std::stoi(getStringUntil(source, 0)); + unsigned long long left = size; + + sink.createDirectory(sinkPath); + + while (left) { + std::string perms = getStringUntil(source, ' '); + left -= perms.size(); + left -= 1; + + RawMode rawMode = std::stoi(perms, 0, 8); + auto modeOpt = decodeMode(rawMode); + if (!modeOpt) + throw Error("Unknown Git permission: %o", perms); + auto mode = std::move(*modeOpt); + + std::string name = getStringUntil(source, '\0'); + left -= name.size(); + left -= 1; + + std::string hashs = getString(source, 20); + left -= 20; + + Hash hash(HashAlgorithm::SHA1); + std::copy(hashs.begin(), hashs.end(), hash.hash); + + hook(name, TreeEntry { + .mode = mode, + .hash = hash, + }); + } +} + +ObjectType parseObjectType( + Source & source, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + auto type = getString(source, 5); + + if (type == "blob ") { + return ObjectType::Blob; + } else if (type == "tree ") { + return ObjectType::Tree; + } else throw Error("input doesn't look like a Git object"); +} + +void parse( + FileSystemObjectSink & sink, + const Path & sinkPath, + Source & source, + bool executable, + std::function hook, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + auto type = parseObjectType(source, xpSettings); + + switch (type) { + case ObjectType::Blob: + parseBlob(sink, sinkPath, source, executable, xpSettings); + break; + case ObjectType::Tree: + parseTree(sink, sinkPath, source, hook, xpSettings); + break; + default: + assert(false); + }; +} + + +std::optional convertMode(SourceAccessor::Type type) +{ + switch (type) { + case SourceAccessor::tSymlink: return Mode::Symlink; + case SourceAccessor::tRegular: return Mode::Regular; + case SourceAccessor::tDirectory: return Mode::Directory; + case SourceAccessor::tMisc: return std::nullopt; + default: abort(); + } +} + + +void restore(FileSystemObjectSink & sink, Source & source, std::function hook) +{ + parse(sink, "", source, false, [&](Path name, TreeEntry entry) { + auto [accessor, from] = hook(entry.hash); + auto stat = accessor->lstat(from); + auto gotOpt = convertMode(stat.type); + if (!gotOpt) + throw Error("file '%s' (git hash %s) has an unsupported type", + from, + entry.hash.to_string(HashFormat::Base16, false)); + auto & got = *gotOpt; + if (got != entry.mode) + throw Error("git mode of file '%s' (git hash %s) is %o but expected %o", + from, + entry.hash.to_string(HashFormat::Base16, false), + (RawMode) got, + (RawMode) entry.mode); + copyRecursive( + *accessor, from, + sink, name); + }); +} + + +void dumpBlobPrefix( + uint64_t size, Sink & sink, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + auto s = fmt("blob %d\0"s, std::to_string(size)); + sink(s); +} + + +void dumpTree(const Tree & entries, Sink & sink, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + std::string v1; + + for (auto & [name, entry] : entries) { + auto name2 = name; + if (entry.mode == Mode::Directory) { + assert(name2.back() == '/'); + name2.pop_back(); + } + v1 += fmt("%o %s\0"s, static_cast(entry.mode), name2); + std::copy(entry.hash.hash, entry.hash.hash + entry.hash.hashSize, std::back_inserter(v1)); + } + + { + auto s = fmt("tree %d\0"s, v1.size()); + sink(s); + } + + sink(v1); +} + + +Mode dump( + SourceAccessor & accessor, const CanonPath & path, + Sink & sink, + std::function hook, + PathFilter & filter, + const ExperimentalFeatureSettings & xpSettings) +{ + auto st = accessor.lstat(path); + + switch (st.type) { + case SourceAccessor::tRegular: + { + accessor.readFile(path, sink, [&](uint64_t size) { + dumpBlobPrefix(size, sink, xpSettings); + }); + return st.isExecutable + ? Mode::Executable + : Mode::Regular; + } + + case SourceAccessor::tDirectory: + { + Tree entries; + for (auto & [name, _] : accessor.readDirectory(path)) { + auto child = path / name; + if (!filter(child.abs())) continue; + + auto entry = hook(child); + + auto name2 = name; + if (entry.mode == Mode::Directory) + name2 += "/"; + + entries.insert_or_assign(std::move(name2), std::move(entry)); + } + dumpTree(entries, sink, xpSettings); + return Mode::Directory; + } + + case SourceAccessor::tSymlink: + case SourceAccessor::tMisc: + default: + throw Error("file '%1%' has an unsupported type", path); + } +} + + +TreeEntry dumpHash( + HashAlgorithm ha, + SourceAccessor & accessor, const CanonPath & path, PathFilter & filter) +{ + std::function hook; + hook = [&](const CanonPath & path) -> TreeEntry { + auto hashSink = HashSink(ha); + auto mode = dump(accessor, path, hashSink, hook, filter); + auto hash = hashSink.finish().first; + return { + .mode = mode, + .hash = hash, + }; + }; + + return hook(path); +} + + std::optional parseLsRemoteLine(std::string_view line) { const static std::regex line_regex("^(ref: *)?([^\\s]+)(?:\\t+(.*))?$"); diff --git a/src/libutil/git.hh b/src/libutil/git.hh index dea351929..d9eb138e1 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -5,8 +5,160 @@ #include #include +#include "types.hh" +#include "serialise.hh" +#include "hash.hh" +#include "source-accessor.hh" +#include "fs-sink.hh" + namespace nix::git { +enum struct ObjectType { + Blob, + Tree, + //Commit, + //Tag, +}; + +using RawMode = uint32_t; + +enum struct Mode : RawMode { + Directory = 0040000, + Regular = 0100644, + Executable = 0100755, + Symlink = 0120000, +}; + +std::optional decodeMode(RawMode m); + +/** + * An anonymous Git tree object entry (no name part). + */ +struct TreeEntry +{ + Mode mode; + Hash hash; + + GENERATE_CMP(TreeEntry, me->mode, me->hash); +}; + +/** + * A Git tree object, fully decoded and stored in memory. + * + * Directory names must end in a `/` for sake of sorting. See + * https://github.com/mirage/irmin/issues/352 + */ +using Tree = std::map; + +/** + * Callback for processing a child hash with `parse` + * + * The function should + * + * 1. Obtain the file system objects denoted by `gitHash` + * + * 2. Ensure they match `mode` + * + * 3. Feed them into the same sink `parse` was called with + * + * Implementations may seek to memoize resources (bandwidth, storage, + * etc.) for the same Git hash. + */ +using SinkHook = void(const Path & name, TreeEntry entry); + +/** + * Parse the "blob " or "tree " prefix. + * + * @throws if prefix not recognized + */ +ObjectType parseObjectType( + Source & source, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +void parseBlob( + FileSystemObjectSink & sink, const Path & sinkPath, + Source & source, + bool executable, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +void parseTree( + FileSystemObjectSink & sink, const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Helper putting the previous three `parse*` functions together. + */ +void parse( + FileSystemObjectSink & sink, const Path & sinkPath, + Source & source, + bool executable, + std::function hook, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Assists with writing a `SinkHook` step (2). + */ +std::optional convertMode(SourceAccessor::Type type); + +/** + * Simplified version of `SinkHook` for `restore`. + * + * Given a `Hash`, return a `SourceAccessor` and `CanonPath` pointing to + * the file system object with that path. + */ +using RestoreHook = std::pair(Hash); + +/** + * Wrapper around `parse` and `RestoreSink` + */ +void restore(FileSystemObjectSink & sink, Source & source, std::function hook); + +/** + * Dumps a single file to a sink + * + * @param xpSettings for testing purposes + */ +void dumpBlobPrefix( + uint64_t size, Sink & sink, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Dumps a representation of a git tree to a sink + */ +void dumpTree( + const Tree & entries, Sink & sink, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Callback for processing a child with `dump` + * + * The function should return the Git hash and mode of the file at the + * given path in the accessor passed to `dump`. + * + * Note that if the child is a directory, its child in must also be so + * processed in order to compute this information. + */ +using DumpHook = TreeEntry(const CanonPath & path); + +Mode dump( + SourceAccessor & accessor, const CanonPath & path, + Sink & sink, + std::function hook, + PathFilter & filter = defaultPathFilter, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Recursively dumps path, hashing as we go. + * + * A smaller wrapper around `dump`. + */ +TreeEntry dumpHash( + HashAlgorithm ha, + SourceAccessor & accessor, const CanonPath & path, + PathFilter & filter = defaultPathFilter); + /** * A line from the output of `git ls-remote --symref`. * diff --git a/tests/unit/libutil/git.cc b/tests/unit/libutil/git.cc index 73bbd049e..76ef86bcf 100644 --- a/tests/unit/libutil/git.cc +++ b/tests/unit/libutil/git.cc @@ -9,6 +9,211 @@ namespace nix { using namespace git; +class GitTest : public CharacterizationTest +{ + Path unitTestData = getUnitTestData() + "/git"; + +public: + + Path goldenMaster(std::string_view testStem) const override { + return unitTestData + "/" + testStem; + } + + /** + * We set these in tests rather than the regular globals so we don't have + * to worry about race conditions if the tests run concurrently. + */ + ExperimentalFeatureSettings mockXpSettings; + +private: + + void SetUp() override + { + mockXpSettings.set("experimental-features", "git-hashing"); + } +}; + +TEST(GitMode, gitMode_directory) { + Mode m = Mode::Directory; + RawMode r = 0040000; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_executable) { + Mode m = Mode::Executable; + RawMode r = 0100755; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_regular) { + Mode m = Mode::Regular; + RawMode r = 0100644; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_symlink) { + Mode m = Mode::Symlink; + RawMode r = 0120000; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST_F(GitTest, blob_read) { + readTest("hello-world-blob.bin", [&](const auto & encoded) { + StringSource in { encoded }; + StringSink out; + RegularFileSink out2 { out }; + ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob); + parseBlob(out2, "", in, false, mockXpSettings); + + auto expected = readFile(goldenMaster("hello-world.bin")); + + ASSERT_EQ(out.s, expected); + }); +} + +TEST_F(GitTest, blob_write) { + writeTest("hello-world-blob.bin", [&]() { + auto decoded = readFile(goldenMaster("hello-world.bin")); + StringSink s; + dumpBlobPrefix(decoded.size(), s, mockXpSettings); + s(decoded); + return s.s; + }); +} + +/** + * This data is for "shallow" tree tests. However, we use "real" hashes + * so that we can check our test data in a small shell script test test + * (`tests/unit/libutil/data/git/check-data.sh`). + */ +const static Tree tree = { + { + "Foo", + { + .mode = Mode::Regular, + // hello world with special chars from above + .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1), + }, + }, + { + "bAr", + { + .mode = Mode::Executable, + // ditto + .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1), + }, + }, + { + "baZ/", + { + .mode = Mode::Directory, + // Empty directory hash + .hash = Hash::parseAny("4b825dc642cb6eb9a060e54bf8d69288fbee4904", HashAlgorithm::SHA1), + }, + }, +}; + +TEST_F(GitTest, tree_read) { + readTest("tree.bin", [&](const auto & encoded) { + StringSource in { encoded }; + NullFileSystemObjectSink out; + Tree got; + ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Tree); + parseTree(out, "", in, [&](auto & name, auto entry) { + auto name2 = name; + if (entry.mode == Mode::Directory) + name2 += '/'; + got.insert_or_assign(name2, std::move(entry)); + }, mockXpSettings); + + ASSERT_EQ(got, tree); + }); +} + +TEST_F(GitTest, tree_write) { + writeTest("tree.bin", [&]() { + StringSink s; + dumpTree(tree, s, mockXpSettings); + return s.s; + }); +} + +TEST_F(GitTest, both_roundrip) { + using File = MemorySourceAccessor::File; + + MemorySourceAccessor files; + files.root = File::Directory { + .contents { + { + "foo", + File::Regular { + .contents = "hello\n\0\n\tworld!", + }, + }, + { + "bar", + File::Directory { + .contents = { + { + "baz", + File::Regular { + .executable = true, + .contents = "good day,\n\0\n\tworld!", + }, + }, + }, + }, + }, + }, + }; + + std::map cas; + + std::function dumpHook; + dumpHook = [&](const CanonPath & path) { + StringSink s; + HashSink hashSink { HashAlgorithm::SHA1 }; + TeeSink s2 { s, hashSink }; + auto mode = dump( + files, path, s2, dumpHook, + defaultPathFilter, mockXpSettings); + auto hash = hashSink.finish().first; + cas.insert_or_assign(hash, std::move(s.s)); + return TreeEntry { + .mode = mode, + .hash = hash, + }; + }; + + auto root = dumpHook(CanonPath::root); + + MemorySourceAccessor files2; + + MemorySink sinkFiles2 { files2 }; + + std::function mkSinkHook; + mkSinkHook = [&](auto prefix, auto & hash, auto executable) { + StringSource in { cas[hash] }; + parse( + sinkFiles2, prefix, in, executable, + [&](const Path & name, const auto & entry) { + mkSinkHook( + prefix + "/" + name, + entry.hash, + entry.mode == Mode::Executable); + }, + mockXpSettings); + }; + + mkSinkHook("", root.hash, false); + + ASSERT_EQ(files, files2); +} + TEST(GitLsRemote, parseSymrefLineWithReference) { auto line = "ref: refs/head/main HEAD"; auto res = parseLsRemoteLine(line);