Handle tarballs that don't consist of a single top-level directory

Fixes #4785 (top-level directories are no longer merged into one).

Fixes #10983 (top-level non-directories are no longer discarded).
This commit is contained in:
Eelco Dolstra 2024-07-26 20:24:58 +02:00
parent d9ba2a1634
commit 06b686b62d
2 changed files with 82 additions and 20 deletions

View File

@ -126,7 +126,7 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
} }
template<typename T> template<typename T>
T peelObject(git_repository * repo, git_object * obj, git_object_t type) T peelObject(git_object * obj, git_object_t type)
{ {
T obj2; T obj2;
if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) { if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) {
@ -136,6 +136,29 @@ T peelObject(git_repository * repo, git_object * obj, git_object_t type)
return obj2; return obj2;
} }
template<typename T>
T dupObject(typename T::pointer obj)
{
T obj2;
if (git_object_dup((git_object * *) (typename T::pointer *) Setter(obj2), (git_object *) obj))
throw Error("duplicating object '%s': %s", *git_object_id((git_object *) obj), git_error_last()->message);
return obj2;
}
/**
* Peel the specified object (i.e. follow tag and commit objects) to
* either a blob or a tree.
*/
static Object peelToTreeOrBlob(git_object * obj)
{
/* git_object_peel() doesn't handle blob objects, so handle those
specially. */
if (git_object_type(obj) == GIT_OBJECT_BLOB)
return dupObject<Object>(obj);
else
return peelObject<Object>(obj, GIT_OBJECT_TREE);
}
struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl> struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
{ {
/** Location of the repository on disk. */ /** Location of the repository on disk. */
@ -166,7 +189,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
std::unordered_set<git_oid> done; std::unordered_set<git_oid> done;
std::queue<Commit> todo; std::queue<Commit> todo;
todo.push(peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT)); todo.push(peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
while (auto commit = pop(todo)) { while (auto commit = pop(todo)) {
if (!done.insert(*git_commit_id(commit->get())).second) continue; if (!done.insert(*git_commit_id(commit->get())).second) continue;
@ -184,7 +207,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
uint64_t getLastModified(const Hash & rev) override uint64_t getLastModified(const Hash & rev) override
{ {
auto commit = peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT); auto commit = peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
return git_commit_time(commit.get()); return git_commit_time(commit.get());
} }
@ -476,11 +499,11 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
struct GitSourceAccessor : SourceAccessor struct GitSourceAccessor : SourceAccessor
{ {
ref<GitRepoImpl> repo; ref<GitRepoImpl> repo;
Tree root; Object root;
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev) GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
: repo(repo_) : repo(repo_)
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE)) , root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
{ {
} }
@ -506,7 +529,7 @@ struct GitSourceAccessor : SourceAccessor
std::optional<Stat> maybeLstat(const CanonPath & path) override std::optional<Stat> maybeLstat(const CanonPath & path) override
{ {
if (path.isRoot()) if (path.isRoot())
return Stat { .type = tDirectory }; return Stat { .type = git_object_type(root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular };
auto entry = lookup(path); auto entry = lookup(path);
if (!entry) if (!entry)
@ -616,10 +639,10 @@ struct GitSourceAccessor : SourceAccessor
std::optional<Tree> lookupTree(const CanonPath & path) std::optional<Tree> lookupTree(const CanonPath & path)
{ {
if (path.isRoot()) { if (path.isRoot()) {
Tree tree; if (git_object_type(root.get()) == GIT_OBJECT_TREE)
if (git_tree_dup(Setter(tree), root.get())) return dupObject<Tree>((git_tree *) &*root);
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message); else
return tree; return std::nullopt;
} }
auto entry = lookup(path); auto entry = lookup(path);
@ -646,10 +669,10 @@ struct GitSourceAccessor : SourceAccessor
std::variant<Tree, Submodule> getTree(const CanonPath & path) std::variant<Tree, Submodule> getTree(const CanonPath & path)
{ {
if (path.isRoot()) { if (path.isRoot()) {
Tree tree; if (git_object_type(root.get()) == GIT_OBJECT_TREE)
if (git_tree_dup(Setter(tree), root.get())) return dupObject<Tree>((git_tree *) &*root);
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message); else
return tree; throw Error("Git root object '%s' is not a directory", *git_object_id(root.get()));
} }
auto entry = need(path); auto entry = need(path);
@ -669,6 +692,9 @@ struct GitSourceAccessor : SourceAccessor
Blob getBlob(const CanonPath & path, bool expectSymlink) Blob getBlob(const CanonPath & path, bool expectSymlink)
{ {
if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB)
return dupObject<Blob>((git_blob *) &*root);
auto notExpected = [&]() auto notExpected = [&]()
{ {
throw Error( throw Error(
@ -782,8 +808,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
std::vector<PendingDir> pendingDirs; std::vector<PendingDir> pendingDirs;
size_t componentsToStrip = 1;
void pushBuilder(std::string name) void pushBuilder(std::string name)
{ {
git_treebuilder * b; git_treebuilder * b;
@ -839,9 +863,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
{ {
std::span<const std::string> pathComponents2{pathComponents}; std::span<const std::string> pathComponents2{pathComponents};
if (pathComponents2.size() <= componentsToStrip) return false;
pathComponents2 = pathComponents2.subspan(componentsToStrip);
updateBuilders( updateBuilders(
isDir isDir
? pathComponents2 ? pathComponents2
@ -964,11 +985,27 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
git_tree_entry_filemode(entry)); git_tree_entry_filemode(entry));
} }
Hash sync() override { Hash sync() override
{
updateBuilders({}); updateBuilders({});
auto [oid, _name] = popBuilder(); auto [oid, _name] = popBuilder();
/* If the root directory contains a single entry that is a
directory or a non-executable regular file, return that as
the top-level object. We don't do this for executables
because they don't have a tree hash in the Git object
model. */
auto _tree = lookupObject(*repo, oid, GIT_OBJECT_TREE);
auto tree = (const git_tree *) &*_tree;
if (git_tree_entrycount(tree) == 1) {
auto entry = git_tree_entry_byindex(tree, 0);
auto mode = git_tree_entry_filemode(entry);
if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_TREE)
oid = *git_tree_entry_id(entry);
}
return toHash(oid); return toHash(oid);
} }
}; };

View File

@ -83,3 +83,28 @@ path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .s
[[ $(cat "$path/a/zzz") = bar ]] [[ $(cat "$path/a/zzz") = bar ]]
[[ $(cat "$path/c/aap") = bar ]] [[ $(cat "$path/c/aap") = bar ]]
[[ $(cat "$path/fnord") = bar ]] [[ $(cat "$path/fnord") = bar ]]
# Test a tarball that has multiple top-level directories.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root" "$TEST_ROOT/tar_root/foo" "$TEST_ROOT/tar_root/bar"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ -d "$path/foo" ]]
[[ -d "$path/bar" ]]
# Test a tarball that has a single non-executable regular file.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root"
echo bar > "$TEST_ROOT/tar_root/foo"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ $(cat "$path") = bar ]]
# Test a tarball that has a single executable regular file.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root"
echo bar > "$TEST_ROOT/tar_root/foo"
chmod +x "$TEST_ROOT/tar_root/foo"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ $(cat "$path/foo") = bar ]]