Allow tarball URLs to redirect to a lockable immutable URL

Previously, for tarball flakes, we recorded the original URL of the
tarball flake, rather than the URL to which it ultimately
redirects. Thus, a flake URL like
http://example.org/patchelf-latest.tar that redirects to
http://example.org/patchelf-<revision>.tar was not really usable. We
couldn't record the redirected URL, because sites like GitHub redirect
to CDN URLs that we can't rely on to be stable.

So now we use the redirected URL only if the server returns the
`x-nix-is-immutable` or `x-amz-meta-nix-is-immutable` headers in its
response.

(cherry picked from commit 1ad3328c5e)
(cherry picked from commit dc718e28c9)
This commit is contained in:
Eelco Dolstra 2023-06-07 14:26:30 +02:00
parent 375bd8012a
commit 543c06322c
9 changed files with 91 additions and 30 deletions

View File

@ -165,7 +165,7 @@ Path lookupFileArg(EvalState & state, std::string_view s)
{
if (EvalSettings::isPseudoUrl(s)) {
auto storePath = fetchers::downloadTarball(
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).first.storePath;
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).tree.storePath;
return state.store->toRealPath(storePath);
}

View File

@ -805,7 +805,7 @@ std::pair<bool, std::string> EvalState::resolveSearchPathElem(const SearchPathEl
if (EvalSettings::isPseudoUrl(elem.second)) {
try {
auto storePath = fetchers::downloadTarball(
store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).first.storePath;
store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).tree.storePath;
res = { true, store->toRealPath(storePath) };
} catch (FileTransferError & e) {
logWarning({

View File

@ -258,7 +258,7 @@ static void fetch(EvalState & state, const PosIdx pos, Value * * args, Value & v
// https://github.com/NixOS/nix/issues/4313
auto storePath =
unpack
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).first.storePath
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).tree.storePath
: fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath;
if (expectedHash) {

View File

@ -2,6 +2,7 @@
///@file
#include "types.hh"
#include "hash.hh"
#include <variant>

View File

@ -158,6 +158,7 @@ struct DownloadFileResult
StorePath storePath;
std::string etag;
std::string effectiveUrl;
std::optional<std::string> immutableUrl;
};
DownloadFileResult downloadFile(
@ -167,7 +168,14 @@ DownloadFileResult downloadFile(
bool locked,
const Headers & headers = {});
std::pair<Tree, time_t> downloadTarball(
struct DownloadTarballResult
{
Tree tree;
time_t lastModified;
std::optional<std::string> immutableUrl;
};
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,

View File

@ -207,21 +207,21 @@ struct GitArchiveInputScheme : InputScheme
auto url = getDownloadUrl(input);
auto [tree, lastModified] = downloadTarball(store, url.url, input.getName(), true, url.headers);
auto result = downloadTarball(store, url.url, input.getName(), true, url.headers);
input.attrs.insert_or_assign("lastModified", uint64_t(lastModified));
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));
getCache()->add(
store,
lockedAttrs,
{
{"rev", rev->gitRev()},
{"lastModified", uint64_t(lastModified)}
{"lastModified", uint64_t(result.lastModified)}
},
tree.storePath,
result.tree.storePath,
true);
return {std::move(tree.storePath), input};
return {result.tree.storePath, input};
}
};

View File

@ -32,7 +32,8 @@ DownloadFileResult downloadFile(
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url")
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
};
@ -55,12 +56,14 @@ DownloadFileResult downloadFile(
}
// FIXME: write to temporary file.
Attrs infoAttrs({
{"etag", res.etag},
{"url", res.effectiveUri},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
std::optional<StorePath> storePath;
if (res.cached) {
@ -107,10 +110,11 @@ DownloadFileResult downloadFile(
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = res.effectiveUri,
.immutableUrl = res.immutableUrl,
};
}
std::pair<Tree, time_t> downloadTarball(
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,
@ -127,8 +131,9 @@ std::pair<Tree, time_t> downloadTarball(
if (cached && !cached->expired)
return {
Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) },
getIntAttr(cached->infoAttrs, "lastModified")
.tree = Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) },
.lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
auto res = downloadFile(store, url, name, locked, headers);
@ -156,6 +161,9 @@ std::pair<Tree, time_t> downloadTarball(
{"etag", res.etag},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
getCache()->add(
store,
inAttrs,
@ -164,8 +172,9 @@ std::pair<Tree, time_t> downloadTarball(
locked);
return {
Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) },
lastModified,
.tree = Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) },
.lastModified = lastModified,
.immutableUrl = res.immutableUrl,
};
}
@ -185,21 +194,33 @@ struct CurlInputScheme : InputScheme
virtual bool isValidURL(const ParsedURL & url) const = 0;
std::optional<Input> inputFromURL(const ParsedURL & url) const override
std::optional<Input> inputFromURL(const ParsedURL & _url) const override
{
if (!isValidURL(url))
if (!isValidURL(_url))
return std::nullopt;
Input input;
auto urlWithoutApplicationScheme = url;
urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
auto url = _url;
url.scheme = parseUrlScheme(url.scheme).transport;
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
if (auto i = get(url.query, "rev"))
input.attrs.insert_or_assign("rev", *i);
if (auto i = get(url.query, "revCount"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);
url.query.erase("rev");
url.query.erase("revCount");
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", url.to_string());
return input;
}
@ -208,7 +229,8 @@ struct CurlInputScheme : InputScheme
auto type = maybeGetStrAttr(attrs, "type");
if (type != inputType()) return {};
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
// FIXME: some of these only apply to TarballInputScheme.
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount"};
for (auto & [name, value] : attrs)
if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'", *type, name);
@ -271,10 +293,22 @@ struct TarballInputScheme : CurlInputScheme
: hasTarballExtension(url.path));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
return {std::move(tree.storePath), input};
Input input(_input);
auto url = getStrAttr(input.attrs, "url");
auto result = downloadTarball(store, url, input.getName(), false);
if (result.immutableUrl) {
auto immutableInput = Input::fromURL(*result.immutableUrl);
// FIXME: would be nice to support arbitrary flakerefs
// here, e.g. git flakes.
if (immutableInput.getType() != "tarball")
throw Error("tarball 'Link' headers that redirect to non-tarball URLs are not supported");
input = immutableInput;
}
return {result.tree.storePath, std::move(input)};
}
};

View File

@ -186,9 +186,9 @@ struct curlFileTransfer : public FileTransfer
size_t realSize = size * nmemb;
std::string line((char *) contents, realSize);
printMsg(lvlVomit, "got header for '%s': %s", request.uri, trim(line));
static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase);
std::smatch match;
if (std::regex_match(line, match, statusLine)) {
if (std::smatch match; std::regex_match(line, match, statusLine)) {
result.etag = "";
result.data.clear();
result.bodySize = 0;
@ -196,9 +196,11 @@ struct curlFileTransfer : public FileTransfer
acceptRanges = false;
encoding = "";
} else {
auto i = line.find(':');
if (i != std::string::npos) {
std::string name = toLower(trim(line.substr(0, i)));
if (name == "etag") {
result.etag = trim(line.substr(i + 1));
/* Hack to work around a GitHub bug: it sends
@ -212,10 +214,22 @@ struct curlFileTransfer : public FileTransfer
debug("shutting down on 200 HTTP response with expected ETag");
return 0;
}
} else if (name == "content-encoding")
}
else if (name == "content-encoding")
encoding = trim(line.substr(i + 1));
else if (name == "accept-ranges" && toLower(trim(line.substr(i + 1))) == "bytes")
acceptRanges = true;
else if (name == "link" || name == "x-amz-meta-link") {
auto value = trim(line.substr(i + 1));
static std::regex linkRegex("<([^>]*)>; rel=\"immutable\"", std::regex::extended | std::regex::icase);
if (std::smatch match; std::regex_match(value, match, linkRegex))
result.immutableUrl = match.str(1);
else
debug("got invalid link header '%s'", value);
}
}
}
return realSize;
@ -345,7 +359,7 @@ struct curlFileTransfer : public FileTransfer
{
auto httpStatus = getHTTPStatus();
char * effectiveUriCStr;
char * effectiveUriCStr = nullptr;
curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr);
if (effectiveUriCStr)
result.effectiveUri = effectiveUriCStr;

View File

@ -80,6 +80,10 @@ struct FileTransferResult
std::string effectiveUri;
std::string data;
uint64_t bodySize = 0;
/* An "immutable" URL for this resource (i.e. one whose contents
will never change), as returned by the `Link: <url>;
rel="immutable"` header. */
std::optional<std::string> immutableUrl;
};
class Store;