From 543c06322ca7675680daf580168285584ed75d9d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 7 Jun 2023 14:26:30 +0200 Subject: [PATCH] Allow tarball URLs to redirect to a lockable immutable URL Previously, for tarball flakes, we recorded the original URL of the tarball flake, rather than the URL to which it ultimately redirects. Thus, a flake URL like http://example.org/patchelf-latest.tar that redirects to http://example.org/patchelf-.tar was not really usable. We couldn't record the redirected URL, because sites like GitHub redirect to CDN URLs that we can't rely on to be stable. So now we use the redirected URL only if the server returns the `x-nix-is-immutable` or `x-amz-meta-nix-is-immutable` headers in its response. (cherry picked from commit 1ad3328c5efea041990fa82e6ad24ae2b4e81c24) (cherry picked from commit dc718e28c98ef5cca9db3e0517b43f679789b82a) --- src/libcmd/common-eval-args.cc | 2 +- src/libexpr/parser.y | 2 +- src/libexpr/primops/fetchTree.cc | 2 +- src/libfetchers/attrs.hh | 1 + src/libfetchers/fetchers.hh | 10 ++++- src/libfetchers/github.cc | 10 ++--- src/libfetchers/tarball.cc | 68 ++++++++++++++++++++++++-------- src/libstore/filetransfer.cc | 22 +++++++++-- src/libstore/filetransfer.hh | 4 ++ 9 files changed, 91 insertions(+), 30 deletions(-) diff --git a/src/libcmd/common-eval-args.cc b/src/libcmd/common-eval-args.cc index 5b6477c82..181a56f28 100644 --- a/src/libcmd/common-eval-args.cc +++ b/src/libcmd/common-eval-args.cc @@ -165,7 +165,7 @@ Path lookupFileArg(EvalState & state, std::string_view s) { if (EvalSettings::isPseudoUrl(s)) { auto storePath = fetchers::downloadTarball( - state.store, EvalSettings::resolvePseudoUrl(s), "source", false).first.storePath; + state.store, EvalSettings::resolvePseudoUrl(s), "source", false).tree.storePath; return state.store->toRealPath(storePath); } diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 97e615c37..68b39efa8 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -805,7 +805,7 @@ std::pair EvalState::resolveSearchPathElem(const SearchPathEl if (EvalSettings::isPseudoUrl(elem.second)) { try { auto storePath = fetchers::downloadTarball( - store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).first.storePath; + store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).tree.storePath; res = { true, store->toRealPath(storePath) }; } catch (FileTransferError & e) { logWarning({ diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 0d0e00fa5..7123b637a 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -258,7 +258,7 @@ static void fetch(EvalState & state, const PosIdx pos, Value * * args, Value & v // https://github.com/NixOS/nix/issues/4313 auto storePath = unpack - ? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).first.storePath + ? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).tree.storePath : fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath; if (expectedHash) { diff --git a/src/libfetchers/attrs.hh b/src/libfetchers/attrs.hh index 1a14bb023..9f885a793 100644 --- a/src/libfetchers/attrs.hh +++ b/src/libfetchers/attrs.hh @@ -2,6 +2,7 @@ ///@file #include "types.hh" +#include "hash.hh" #include diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 498ad7e4d..d0738f619 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -158,6 +158,7 @@ struct DownloadFileResult StorePath storePath; std::string etag; std::string effectiveUrl; + std::optional immutableUrl; }; DownloadFileResult downloadFile( @@ -167,7 +168,14 @@ DownloadFileResult downloadFile( bool locked, const Headers & headers = {}); -std::pair downloadTarball( +struct DownloadTarballResult +{ + Tree tree; + time_t lastModified; + std::optional immutableUrl; +}; + +DownloadTarballResult downloadTarball( ref store, const std::string & url, const std::string & name, diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 1ed09d30d..07be6c9b3 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -207,21 +207,21 @@ struct GitArchiveInputScheme : InputScheme auto url = getDownloadUrl(input); - auto [tree, lastModified] = downloadTarball(store, url.url, input.getName(), true, url.headers); + auto result = downloadTarball(store, url.url, input.getName(), true, url.headers); - input.attrs.insert_or_assign("lastModified", uint64_t(lastModified)); + input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified)); getCache()->add( store, lockedAttrs, { {"rev", rev->gitRev()}, - {"lastModified", uint64_t(lastModified)} + {"lastModified", uint64_t(result.lastModified)} }, - tree.storePath, + result.tree.storePath, true); - return {std::move(tree.storePath), input}; + return {result.tree.storePath, input}; } }; diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index e9686262a..c2bf23332 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -32,7 +32,8 @@ DownloadFileResult downloadFile( return { .storePath = std::move(cached->storePath), .etag = getStrAttr(cached->infoAttrs, "etag"), - .effectiveUrl = getStrAttr(cached->infoAttrs, "url") + .effectiveUrl = getStrAttr(cached->infoAttrs, "url"), + .immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"), }; }; @@ -55,12 +56,14 @@ DownloadFileResult downloadFile( } // FIXME: write to temporary file. - Attrs infoAttrs({ {"etag", res.etag}, {"url", res.effectiveUri}, }); + if (res.immutableUrl) + infoAttrs.emplace("immutableUrl", *res.immutableUrl); + std::optional storePath; if (res.cached) { @@ -107,10 +110,11 @@ DownloadFileResult downloadFile( .storePath = std::move(*storePath), .etag = res.etag, .effectiveUrl = res.effectiveUri, + .immutableUrl = res.immutableUrl, }; } -std::pair downloadTarball( +DownloadTarballResult downloadTarball( ref store, const std::string & url, const std::string & name, @@ -127,8 +131,9 @@ std::pair downloadTarball( if (cached && !cached->expired) return { - Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) }, - getIntAttr(cached->infoAttrs, "lastModified") + .tree = Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) }, + .lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"), + .immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"), }; auto res = downloadFile(store, url, name, locked, headers); @@ -156,6 +161,9 @@ std::pair downloadTarball( {"etag", res.etag}, }); + if (res.immutableUrl) + infoAttrs.emplace("immutableUrl", *res.immutableUrl); + getCache()->add( store, inAttrs, @@ -164,8 +172,9 @@ std::pair downloadTarball( locked); return { - Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) }, - lastModified, + .tree = Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) }, + .lastModified = lastModified, + .immutableUrl = res.immutableUrl, }; } @@ -185,21 +194,33 @@ struct CurlInputScheme : InputScheme virtual bool isValidURL(const ParsedURL & url) const = 0; - std::optional inputFromURL(const ParsedURL & url) const override + std::optional inputFromURL(const ParsedURL & _url) const override { - if (!isValidURL(url)) + if (!isValidURL(_url)) return std::nullopt; Input input; - auto urlWithoutApplicationScheme = url; - urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport; + auto url = _url; + + url.scheme = parseUrlScheme(url.scheme).transport; - input.attrs.insert_or_assign("type", inputType()); - input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string()); auto narHash = url.query.find("narHash"); if (narHash != url.query.end()) input.attrs.insert_or_assign("narHash", narHash->second); + + if (auto i = get(url.query, "rev")) + input.attrs.insert_or_assign("rev", *i); + + if (auto i = get(url.query, "revCount")) + if (auto n = string2Int(*i)) + input.attrs.insert_or_assign("revCount", *n); + + url.query.erase("rev"); + url.query.erase("revCount"); + + input.attrs.insert_or_assign("type", inputType()); + input.attrs.insert_or_assign("url", url.to_string()); return input; } @@ -208,7 +229,8 @@ struct CurlInputScheme : InputScheme auto type = maybeGetStrAttr(attrs, "type"); if (type != inputType()) return {}; - std::set allowedNames = {"type", "url", "narHash", "name", "unpack"}; + // FIXME: some of these only apply to TarballInputScheme. + std::set allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) throw Error("unsupported %s input attribute '%s'", *type, name); @@ -271,10 +293,22 @@ struct TarballInputScheme : CurlInputScheme : hasTarballExtension(url.path)); } - std::pair fetch(ref store, const Input & input) override + std::pair fetch(ref store, const Input & _input) override { - auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first; - return {std::move(tree.storePath), input}; + Input input(_input); + auto url = getStrAttr(input.attrs, "url"); + auto result = downloadTarball(store, url, input.getName(), false); + + if (result.immutableUrl) { + auto immutableInput = Input::fromURL(*result.immutableUrl); + // FIXME: would be nice to support arbitrary flakerefs + // here, e.g. git flakes. + if (immutableInput.getType() != "tarball") + throw Error("tarball 'Link' headers that redirect to non-tarball URLs are not supported"); + input = immutableInput; + } + + return {result.tree.storePath, std::move(input)}; } }; diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 2346accbe..38b691279 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -186,9 +186,9 @@ struct curlFileTransfer : public FileTransfer size_t realSize = size * nmemb; std::string line((char *) contents, realSize); printMsg(lvlVomit, "got header for '%s': %s", request.uri, trim(line)); + static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase); - std::smatch match; - if (std::regex_match(line, match, statusLine)) { + if (std::smatch match; std::regex_match(line, match, statusLine)) { result.etag = ""; result.data.clear(); result.bodySize = 0; @@ -196,9 +196,11 @@ struct curlFileTransfer : public FileTransfer acceptRanges = false; encoding = ""; } else { + auto i = line.find(':'); if (i != std::string::npos) { std::string name = toLower(trim(line.substr(0, i))); + if (name == "etag") { result.etag = trim(line.substr(i + 1)); /* Hack to work around a GitHub bug: it sends @@ -212,10 +214,22 @@ struct curlFileTransfer : public FileTransfer debug("shutting down on 200 HTTP response with expected ETag"); return 0; } - } else if (name == "content-encoding") + } + + else if (name == "content-encoding") encoding = trim(line.substr(i + 1)); + else if (name == "accept-ranges" && toLower(trim(line.substr(i + 1))) == "bytes") acceptRanges = true; + + else if (name == "link" || name == "x-amz-meta-link") { + auto value = trim(line.substr(i + 1)); + static std::regex linkRegex("<([^>]*)>; rel=\"immutable\"", std::regex::extended | std::regex::icase); + if (std::smatch match; std::regex_match(value, match, linkRegex)) + result.immutableUrl = match.str(1); + else + debug("got invalid link header '%s'", value); + } } } return realSize; @@ -345,7 +359,7 @@ struct curlFileTransfer : public FileTransfer { auto httpStatus = getHTTPStatus(); - char * effectiveUriCStr; + char * effectiveUriCStr = nullptr; curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); if (effectiveUriCStr) result.effectiveUri = effectiveUriCStr; diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 378c6ff78..a3b0dde1f 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -80,6 +80,10 @@ struct FileTransferResult std::string effectiveUri; std::string data; uint64_t bodySize = 0; + /* An "immutable" URL for this resource (i.e. one whose contents + will never change), as returned by the `Link: ; + rel="immutable"` header. */ + std::optional immutableUrl; }; class Store;