Avoid creating temporary store object for git over the wire

Instead, serialize as NAR and send that over, then rehash sever side.
This is alorithmically simpler, but comes at the cost of a newer
parameter to `Store::addToStoreFromDump`.

Co-authored-by: Eelco Dolstra <edolstra@gmail.com>
This commit is contained in:
John Ericson 2024-01-18 23:57:26 -05:00
parent 201551c937
commit d4ad1fcf30
16 changed files with 137 additions and 138 deletions

View File

@ -2092,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
})
: ({
StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
});
/* Note: we don't need to add `context' to the context of the

View File

@ -305,7 +305,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
StorePath BinaryCacheStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
@ -313,17 +314,26 @@ StorePath BinaryCacheStore::addToStoreFromDump(
std::optional<Hash> caHash;
std::string nar;
// Calculating Git hash from NAR stream not yet implemented. May not
// be possible to implement in single-pass if the NAR is in an
// inconvenient order. Could fetch after uploading, however.
if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git)
unsupported("addToStoreFromDump");
if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) {
auto & dump2 = *dump2p;
// Hack, this gives us a "replayable" source so we can compute
// multiple hashes more easily.
//
// Only calculate if the dump is in the right format, however.
if (static_cast<FileIngestionMethod>(dumpMethod) == hashMethod.getFileIngestionMethod())
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Recursive:
switch (dumpMethod) {
case FileSerialisationMethod::Recursive:
// The dump is already NAR in this case, just use it.
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
@ -332,14 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s);
break;
}
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
unsupported("addToStoreFromDump");
}
StringSource narDump { nar };
@ -354,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
*this,
name,
ContentAddressWithReferences::fromParts(
method,
hashMethod,
caHash ? *caHash : nar.first,
{
.others = references,

View File

@ -125,7 +125,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;

View File

@ -1312,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override
{
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair);
auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair);
goal.addDependency(path);
return path;
}

View File

@ -401,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto pathInfo = [&]() {
// NB: FramedSource must be out of scope before logger->stopWork();
auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr);
auto hashAlgo = hashAlgo_; // work around clang bug
auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr);
FramedSource source(from);
FileSerialisationMethod dumpMethod;
switch (contentAddressMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
dumpMethod = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
dumpMethod = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
dumpMethod = FileSerialisationMethod::Recursive;
break;
}
// TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store.
auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair);
auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair);
return store->queryPathInfo(path);
}();
logger->stopWork();
@ -431,8 +443,8 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
hashAlgo = parseHashAlgo(hashAlgoRaw);
}
// Old protocol always sends NAR, regardless of hashing method
auto dumpSource = sinkToSource([&](Sink & saved) {
if (method == FileIngestionMethod::Recursive) {
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
@ -444,21 +456,10 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
});
logger->startWork();
auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo);
auto path = store->addToStoreFromDump(
*dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo);
logger->stopWork();
to << store->printStorePath(path);
@ -490,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto path = ({
StringSource source { s };
store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
});
logger->stopWork();
to << store->printStorePath(path);

View File

@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store,
})
: ({
StringSource s { contents };
store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
});
}

View File

@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store
virtual StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override

View File

@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor
virtual StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override

View File

@ -1148,7 +1148,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
StorePath LocalStore::addToStoreFromDump(
Source & source0,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
@ -1201,7 +1202,13 @@ StorePath LocalStore::addToStoreFromDump(
Path tempDir;
AutoCloseFD tempDirFd;
if (!inMemory) {
bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod;
/* If the methods don't match, our streaming hash of the dump is the
wrong sort, and we need to rehash. */
bool inMemoryAndDontNeedRestore = inMemory && methodsMatch;
if (!inMemoryAndDontNeedRestore) {
/* Drain what we pulled so far, and then keep on pulling */
StringSource dumpSource { dump };
ChainSource bothSource { dumpSource, source };
@ -1210,40 +1217,23 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
restorePath(tempPath, bothSource, dumpMethod);
dumpBuffer.reset();
dump = {};
}
auto [hash, size] = hashSink->finish();
auto [dumpHash, size] = hashSink->finish();
PosixSourceAccessor accessor;
auto desc = ContentAddressWithReferences::fromParts(
method,
hash,
hashMethod,
methodsMatch
? dumpHash
: hashPath(
accessor, CanonPath { tempPath },
hashMethod.getFileIngestionMethod(), hashAlgo),
{
.others = references,
// caller is not capable of creating a self-reference, because this is content-addressed without modulus
@ -1269,32 +1259,19 @@ StorePath LocalStore::addToStoreFromDump(
autoGC();
if (inMemory) {
if (inMemoryAndDontNeedRestore) {
StringSource dumpSource { dump };
/* Restore from the buffer in memory. */
auto fim = method.getFileIngestionMethod();
auto fim = hashMethod.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
case FileIngestionMethod::Git:
// doesn't correspond to serialization method, so
// this should be unreachable
assert(false);
}
} else {
/* Move the temporary path we restored above. */
@ -1303,8 +1280,8 @@ StorePath LocalStore::addToStoreFromDump(
/* For computing the nar hash. In recursive SHA-256 mode, this
is the same as the store hash, so no need to do it again. */
auto narHash = std::pair { hash, size };
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) {
auto narHash = std::pair { dumpHash, size };
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) {
HashSink narSink { HashAlgorithm::SHA256 };
dumpPath(realPath, narSink);
narHash = narSink.finish();

View File

@ -180,7 +180,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;

View File

@ -509,12 +509,28 @@ ref<const ValidPathInfo> RemoteStore::addCAToStore(
StorePath RemoteStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
{
return addCAToStore(dump, name, method, hashAlgo, references, repair)->path;
FileSerialisationMethod fsm;
switch (hashMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
fsm = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
fsm = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
fsm = FileSerialisationMethod::Recursive;
break;
}
if (fsm != dumpMethod)
unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination");
return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path;
}

View File

@ -87,7 +87,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override;

View File

@ -197,40 +197,23 @@ StorePath Store::addToStore(
PathFilter & filter,
RepairFlag repair)
{
auto source = sinkToSource([&](Sink & sink) {
auto fim = method.getFileIngestionMethod();
switch (fim) {
FileSerialisationMethod fsm;
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
fsm = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
{
dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter);
fsm = FileSerialisationMethod::Recursive;
break;
}
case FileIngestionMethod::Git:
{
git::dump(
accessor, path,
sink,
// recursively add to store if path is a directory
[&](const CanonPath & path) -> git::TreeEntry {
auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair);
auto info = queryPathInfo(storePath);
assert(info->ca);
assert(info->ca->method == FileIngestionMethod::Git);
auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath)));
auto gitModeOpt = git::convertMode(stat.type);
assert(gitModeOpt);
return {
.mode = *gitModeOpt,
.hash = info->ca->hash,
};
},
filter);
// Use NAR; Git is not a serialization method
fsm = FileSerialisationMethod::Recursive;
break;
}
}
auto source = sinkToSource([&](Sink & sink) {
dumpPath(accessor, path, sink, fsm, filter);
});
return addToStoreFromDump(*source, name, method, hashAlgo, references, repair);
return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair);
}
void Store::addMultipleToStore(

View File

@ -466,14 +466,23 @@ public:
* in `dump`, which is either a NAR serialisation (if recursive ==
* true) or simply the contents of a regular file (if recursive ==
* false).
* `dump` may be drained
*
* \todo remove?
* `dump` may be drained.
*
* @param dumpMethod What serialisation format is `dump`, i.e. how
* to deserialize it. Must either match hashMethod or be
* `FileSerialisationMethod::Recursive`.
*
* @param hashMethod How content addressing? Need not match be the
* same as `dumpMethod`.
*
* @todo remove?
*/
virtual StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) = 0;
@ -772,7 +781,7 @@ protected:
* Helper for methods that are not unsupported: this is used for
* default definitions for virtual methods that are meant to be overriden.
*
* \todo Using this should be a last resort. It is better to make
* @todo Using this should be a last resort. It is better to make
* the method "virtual pure" and/or move it to a subclass.
*/
[[noreturn]] void unsupported(const std::string & op)

View File

@ -113,7 +113,7 @@ bool createUserEnv(EvalState & state, PackageInfos & elems,
std::string str2 = str.str();
StringSource source { str2 };
state.store->addToStoreFromDump(
source, "env-manifest.nix", TextIngestionMethod {}, HashAlgorithm::SHA256, references);
source, "env-manifest.nix", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references);
});
/* Get the environment builder expression. */

View File

@ -226,7 +226,7 @@ static StorePath getDerivationEnvironment(ref<Store> store, ref<Store> evalStore
auto getEnvShPath = ({
StringSource source { getEnvSh };
evalStore->addToStoreFromDump(
source, "get-env.sh", TextIngestionMethod {}, HashAlgorithm::SHA256, {});
source, "get-env.sh", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, {});
});
drv.args = {store->printStorePath(getEnvShPath)};