From 67a6d344487af252d25001b5c43409b56b33ac9d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 14 Feb 2024 19:07:18 +0100 Subject: [PATCH] GitInputAccessor: Speed up lookup() A command like rm -rf ~/.cache/nix/tarball-cache/ ~/.cache/nix/fetcher-cache-v1.sqlite*; nix flake metadata 'git+file:///home/eelco/Dev/nixpkgs?rev=9463103069725474698139ab10f17a9d125da859' was spending about 84% of its runtime in lookup(), specifically in git_tree_entry_bypath(). (The reading of blobs is less than 3%.) It appears libgit2 doesn't do a lot of caching of trees, so we now make sure that when we look up a path, we add all its parents, and all the immediate children of the parents (since we have them in memory anyway), to our own cache. This speed up the command above from 17.2s to 7.8s on my machine. Fixes (or at least should improve a lot) #9684. --- src/libfetchers/git-utils.cc | 61 ++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index cb4a84e53..466bdc6c7 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -576,20 +576,61 @@ struct GitInputAccessor : InputAccessor /* Recursively look up 'path' relative to the root. */ git_tree_entry * lookup(const CanonPath & path) { - if (path.isRoot()) return nullptr; - auto i = lookupCache.find(path); - if (i == lookupCache.end()) { - TreeEntry entry; - if (auto err = git_tree_entry_bypath(Setter(entry), root.get(), std::string(path.rel()).c_str())) { - if (err != GIT_ENOTFOUND) - throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); - } + if (i != lookupCache.end()) return i->second.get(); - i = lookupCache.emplace(path, std::move(entry)).first; + auto parent = path.parent(); + if (!parent) return nullptr; + + auto name = path.baseName().value(); + + auto parentTree = lookupTree(*parent); + if (!parentTree) return nullptr; + + auto count = git_tree_entrycount(parentTree->get()); + + git_tree_entry * res = nullptr; + + /* Add all the tree entries to the cache to speed up + subsequent lookups. */ + for (size_t n = 0; n < count; ++n) { + auto entry = git_tree_entry_byindex(parentTree->get(), n); + + TreeEntry copy; + if (git_tree_entry_dup(Setter(copy), entry)) + throw Error("dupping tree entry: %s", git_error_last()->message); + + auto entryName = std::string_view(git_tree_entry_name(entry)); + + if (entryName == name) + res = copy.get(); + + auto path2 = *parent; + path2.push(entryName); + lookupCache.emplace(path2, std::move(copy)).first->second.get(); } - return &*i->second; + return res; + } + + std::optional lookupTree(const CanonPath & path) + { + if (path.isRoot()) { + Tree tree; + if (git_tree_dup(Setter(tree), root.get())) + throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message); + return tree; + } + + auto entry = lookup(path); + if (!entry || git_tree_entry_type(entry) != GIT_OBJECT_TREE) + return std::nullopt; + + Tree tree; + if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *repo, entry)) + throw Error("looking up directory '%s': %s", showPath(path), git_error_last()->message); + + return tree; } git_tree_entry * need(const CanonPath & path)