Allow tarball URLs to redirect to a lockable immutable URL

Previously, for tarball flakes, we recorded the original URL of the
tarball flake, rather than the URL to which it ultimately
redirects. Thus, a flake URL like
http://example.org/patchelf-latest.tar that redirects to
http://example.org/patchelf-<revision>.tar was not really usable. We
couldn't record the redirected URL, because sites like GitHub redirect
to CDN URLs that we can't rely on to be stable.

So now we use the redirected URL only if the server returns the
`x-nix-is-immutable` or `x-amz-meta-nix-is-immutable` headers in its
response.
This commit is contained in:
Eelco Dolstra 2023-06-07 14:26:30 +02:00
parent 3402b650cd
commit 1ad3328c5e
11 changed files with 177 additions and 30 deletions

View file

@ -590,6 +590,8 @@
tests.sourcehutFlakes = runNixOSTestFor "x86_64-linux" ./tests/nixos/sourcehut-flakes.nix;
tests.tarballFlakes = runNixOSTestFor "x86_64-linux" ./tests/nixos/tarball-flakes.nix;
tests.containers = runNixOSTestFor "x86_64-linux" ./tests/nixos/containers/containers.nix;
tests.setuid = lib.genAttrs

View file

@ -165,7 +165,7 @@ SourcePath lookupFileArg(EvalState & state, std::string_view s)
{
if (EvalSettings::isPseudoUrl(s)) {
auto storePath = fetchers::downloadTarball(
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).first.storePath;
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).tree.storePath;
return state.rootPath(CanonPath(state.store->toRealPath(storePath)));
}

View file

@ -793,7 +793,7 @@ std::pair<bool, std::string> EvalState::resolveSearchPathElem(const SearchPathEl
if (EvalSettings::isPseudoUrl(elem.second)) {
try {
auto storePath = fetchers::downloadTarball(
store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).first.storePath;
store, EvalSettings::resolvePseudoUrl(elem.second), "source", false).tree.storePath;
res = { true, store->toRealPath(storePath) };
} catch (FileTransferError & e) {
logWarning({

View file

@ -262,7 +262,7 @@ static void fetch(EvalState & state, const PosIdx pos, Value * * args, Value & v
// https://github.com/NixOS/nix/issues/4313
auto storePath =
unpack
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).first.storePath
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).tree.storePath
: fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath;
if (expectedHash) {

View file

@ -2,6 +2,7 @@
///@file
#include "types.hh"
#include "hash.hh"
#include <variant>

View file

@ -158,6 +158,7 @@ struct DownloadFileResult
StorePath storePath;
std::string etag;
std::string effectiveUrl;
std::optional<std::string> immutableUrl;
};
DownloadFileResult downloadFile(
@ -167,7 +168,14 @@ DownloadFileResult downloadFile(
bool locked,
const Headers & headers = {});
std::pair<Tree, time_t> downloadTarball(
struct DownloadTarballResult
{
Tree tree;
time_t lastModified;
std::optional<std::string> immutableUrl;
};
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,

View file

@ -207,21 +207,21 @@ struct GitArchiveInputScheme : InputScheme
auto url = getDownloadUrl(input);
auto [tree, lastModified] = downloadTarball(store, url.url, input.getName(), true, url.headers);
auto result = downloadTarball(store, url.url, input.getName(), true, url.headers);
input.attrs.insert_or_assign("lastModified", uint64_t(lastModified));
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));
getCache()->add(
store,
lockedAttrs,
{
{"rev", rev->gitRev()},
{"lastModified", uint64_t(lastModified)}
{"lastModified", uint64_t(result.lastModified)}
},
tree.storePath,
result.tree.storePath,
true);
return {std::move(tree.storePath), input};
return {result.tree.storePath, input};
}
};

View file

@ -32,7 +32,8 @@ DownloadFileResult downloadFile(
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url")
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
};
@ -55,12 +56,14 @@ DownloadFileResult downloadFile(
}
// FIXME: write to temporary file.
Attrs infoAttrs({
{"etag", res.etag},
{"url", res.effectiveUri},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
std::optional<StorePath> storePath;
if (res.cached) {
@ -111,10 +114,11 @@ DownloadFileResult downloadFile(
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = res.effectiveUri,
.immutableUrl = res.immutableUrl,
};
}
std::pair<Tree, time_t> downloadTarball(
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,
@ -131,8 +135,9 @@ std::pair<Tree, time_t> downloadTarball(
if (cached && !cached->expired)
return {
Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) },
getIntAttr(cached->infoAttrs, "lastModified")
.tree = Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) },
.lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
auto res = downloadFile(store, url, name, locked, headers);
@ -160,6 +165,9 @@ std::pair<Tree, time_t> downloadTarball(
{"etag", res.etag},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
getCache()->add(
store,
inAttrs,
@ -168,8 +176,9 @@ std::pair<Tree, time_t> downloadTarball(
locked);
return {
Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) },
lastModified,
.tree = Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) },
.lastModified = lastModified,
.immutableUrl = res.immutableUrl,
};
}
@ -189,21 +198,33 @@ struct CurlInputScheme : InputScheme
virtual bool isValidURL(const ParsedURL & url) const = 0;
std::optional<Input> inputFromURL(const ParsedURL & url) const override
std::optional<Input> inputFromURL(const ParsedURL & _url) const override
{
if (!isValidURL(url))
if (!isValidURL(_url))
return std::nullopt;
Input input;
auto urlWithoutApplicationScheme = url;
urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
auto url = _url;
url.scheme = parseUrlScheme(url.scheme).transport;
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
if (auto i = get(url.query, "rev"))
input.attrs.insert_or_assign("rev", *i);
if (auto i = get(url.query, "revCount"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);
url.query.erase("rev");
url.query.erase("revCount");
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", url.to_string());
return input;
}
@ -212,7 +233,8 @@ struct CurlInputScheme : InputScheme
auto type = maybeGetStrAttr(attrs, "type");
if (type != inputType()) return {};
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
// FIXME: some of these only apply to TarballInputScheme.
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount"};
for (auto & [name, value] : attrs)
if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'", *type, name);
@ -275,10 +297,22 @@ struct TarballInputScheme : CurlInputScheme
: hasTarballExtension(url.path));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
return {std::move(tree.storePath), input};
Input input(_input);
auto url = getStrAttr(input.attrs, "url");
auto result = downloadTarball(store, url, input.getName(), false);
if (result.immutableUrl) {
auto immutableInput = Input::fromURL(*result.immutableUrl);
// FIXME: would be nice to support arbitrary flakerefs
// here, e.g. git flakes.
if (immutableInput.getType() != "tarball")
throw Error("tarball 'Link' headers that redirect to non-tarball URLs are not supported");
input = immutableInput;
}
return {result.tree.storePath, std::move(input)};
}
};

View file

@ -186,9 +186,9 @@ struct curlFileTransfer : public FileTransfer
size_t realSize = size * nmemb;
std::string line((char *) contents, realSize);
printMsg(lvlVomit, "got header for '%s': %s", request.uri, trim(line));
static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase);
std::smatch match;
if (std::regex_match(line, match, statusLine)) {
if (std::smatch match; std::regex_match(line, match, statusLine)) {
result.etag = "";
result.data.clear();
result.bodySize = 0;
@ -196,9 +196,11 @@ struct curlFileTransfer : public FileTransfer
acceptRanges = false;
encoding = "";
} else {
auto i = line.find(':');
if (i != std::string::npos) {
std::string name = toLower(trim(line.substr(0, i)));
if (name == "etag") {
result.etag = trim(line.substr(i + 1));
/* Hack to work around a GitHub bug: it sends
@ -212,10 +214,22 @@ struct curlFileTransfer : public FileTransfer
debug("shutting down on 200 HTTP response with expected ETag");
return 0;
}
} else if (name == "content-encoding")
}
else if (name == "content-encoding")
encoding = trim(line.substr(i + 1));
else if (name == "accept-ranges" && toLower(trim(line.substr(i + 1))) == "bytes")
acceptRanges = true;
else if (name == "link" || name == "x-amz-meta-link") {
auto value = trim(line.substr(i + 1));
static std::regex linkRegex("<([^>]*)>; rel=\"immutable\"", std::regex::extended | std::regex::icase);
if (std::smatch match; std::regex_match(value, match, linkRegex))
result.immutableUrl = match.str(1);
else
debug("got invalid link header '%s'", value);
}
}
}
return realSize;
@ -345,7 +359,7 @@ struct curlFileTransfer : public FileTransfer
{
auto httpStatus = getHTTPStatus();
char * effectiveUriCStr;
char * effectiveUriCStr = nullptr;
curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr);
if (effectiveUriCStr)
result.effectiveUri = effectiveUriCStr;

View file

@ -80,6 +80,10 @@ struct FileTransferResult
std::string effectiveUri;
std::string data;
uint64_t bodySize = 0;
/* An "immutable" URL for this resource (i.e. one whose contents
will never change), as returned by the `Link: <url>;
rel="immutable"` header. */
std::optional<std::string> immutableUrl;
};
class Store;

View file

@ -0,0 +1,84 @@
{ lib, config, nixpkgs, ... }:
let
pkgs = config.nodes.machine.nixpkgs.pkgs;
root = pkgs.runCommand "nixpkgs-flake" {}
''
mkdir -p $out/stable
set -x
dir=nixpkgs-${nixpkgs.shortRev}
cp -prd ${nixpkgs} $dir
# Set the correct timestamp in the tarball.
find $dir -print0 | xargs -0 touch -t ${builtins.substring 0 12 nixpkgs.lastModifiedDate}.${builtins.substring 12 2 nixpkgs.lastModifiedDate} --
tar cfz $out/stable/${nixpkgs.rev}.tar.gz $dir --hard-dereference
echo 'Redirect "/latest.tar.gz" "/stable/${nixpkgs.rev}.tar.gz"' > $out/.htaccess
echo 'Header set Link "<http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234>; rel=\"immutable\""' > $out/stable/.htaccess
'';
in
{
name = "tarball-flakes";
nodes =
{
machine =
{ config, pkgs, ... }:
{ networking.firewall.allowedTCPPorts = [ 80 ];
services.httpd.enable = true;
services.httpd.adminAddr = "foo@example.org";
services.httpd.extraConfig = ''
ErrorLog syslog:local6
'';
services.httpd.virtualHosts."localhost" =
{ servedDirs =
[ { urlPath = "/";
dir = root;
}
];
};
virtualisation.writableStore = true;
virtualisation.diskSize = 2048;
virtualisation.additionalPaths = [ pkgs.hello pkgs.fuse ];
virtualisation.memorySize = 4096;
nix.settings.substituters = lib.mkForce [ ];
nix.extraOptions = "experimental-features = nix-command flakes";
};
};
testScript = { nodes }: ''
# fmt: off
import json
start_all()
machine.wait_for_unit("httpd.service")
out = machine.succeed("nix flake metadata --json http://localhost/latest.tar.gz")
print(out)
info = json.loads(out)
# Check that we got redirected to the immutable URL.
assert info["locked"]["url"] == "http://localhost/stable/${nixpkgs.rev}.tar.gz"
# Check that we got the rev and revCount attributes.
assert info["revision"] == "${nixpkgs.rev}"
assert info["revCount"] == 1234
# Check that fetching with rev/revCount/narHash succeeds.
machine.succeed("nix flake metadata --json http://localhost/latest.tar.gz?rev=" + info["revision"])
machine.succeed("nix flake metadata --json http://localhost/latest.tar.gz?revCount=" + str(info["revCount"]))
machine.succeed("nix flake metadata --json http://localhost/latest.tar.gz?narHash=" + info["locked"]["narHash"])
# Check that fetching fails if we provide incorrect attributes.
machine.fail("nix flake metadata --json http://localhost/latest.tar.gz?rev=493300eb13ae6fb387fbd47bf54a85915acc31c0")
machine.fail("nix flake metadata --json http://localhost/latest.tar.gz?revCount=789")
machine.fail("nix flake metadata --json http://localhost/latest.tar.gz?narHash=sha256-tbudgBSg+bHWHiHnlteNzN8TUvI80ygS9IULh4rklEw=")
'';
}