mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2025-01-19 01:26:47 +02:00
Tarball fetcher: Use the content-addressed Git cache
Backported from the lazy-trees branch.
This commit is contained in:
parent
930b9c8269
commit
cabee98152
9 changed files with 185 additions and 95 deletions
|
@ -9,6 +9,7 @@
|
|||
#include "store-api.hh"
|
||||
#include "command.hh"
|
||||
#include "tarball.hh"
|
||||
#include "fetch-to-store.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
@ -167,8 +168,9 @@ Bindings * MixEvalArgs::getAutoArgs(EvalState & state)
|
|||
SourcePath lookupFileArg(EvalState & state, std::string_view s, const Path * baseDir)
|
||||
{
|
||||
if (EvalSettings::isPseudoUrl(s)) {
|
||||
auto storePath = fetchers::downloadTarball(
|
||||
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).storePath;
|
||||
auto accessor = fetchers::downloadTarball(
|
||||
EvalSettings::resolvePseudoUrl(s)).accessor;
|
||||
auto storePath = fetchToStore(*state.store, SourcePath(accessor), FetchMode::Copy);
|
||||
return state.rootPath(CanonPath(state.store->toRealPath(storePath)));
|
||||
}
|
||||
|
||||
|
|
|
@ -2794,10 +2794,11 @@ std::optional<std::string> EvalState::resolveSearchPathPath(const SearchPath::Pa
|
|||
|
||||
if (EvalSettings::isPseudoUrl(value)) {
|
||||
try {
|
||||
auto storePath = fetchers::downloadTarball(
|
||||
store, EvalSettings::resolvePseudoUrl(value), "source", false).storePath;
|
||||
auto accessor = fetchers::downloadTarball(
|
||||
EvalSettings::resolvePseudoUrl(value)).accessor;
|
||||
auto storePath = fetchToStore(*store, SourcePath(accessor), FetchMode::Copy);
|
||||
res = { store->toRealPath(storePath) };
|
||||
} catch (FileTransferError & e) {
|
||||
} catch (Error & e) {
|
||||
logWarning({
|
||||
.msg = HintFmt("Nix search path entry '%1%' cannot be downloaded, ignoring", value)
|
||||
});
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "tarball.hh"
|
||||
#include "url.hh"
|
||||
#include "value-to-json.hh"
|
||||
#include "fetch-to-store.hh"
|
||||
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
|
@ -473,7 +474,7 @@ static void fetch(EvalState & state, const PosIdx pos, Value * * args, Value & v
|
|||
// https://github.com/NixOS/nix/issues/4313
|
||||
auto storePath =
|
||||
unpack
|
||||
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).storePath
|
||||
? fetchToStore(*state.store, fetchers::downloadTarball(*url).accessor, FetchMode::Copy, name)
|
||||
: fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath;
|
||||
|
||||
if (expectedHash) {
|
||||
|
|
|
@ -466,6 +466,22 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
else
|
||||
throw Error("Commit signature verification on commit %s failed: %s", rev.gitRev(), output);
|
||||
}
|
||||
|
||||
Hash treeHashToNarHash(const Hash & treeHash) override
|
||||
{
|
||||
auto accessor = getAccessor(treeHash, false);
|
||||
|
||||
fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});
|
||||
|
||||
if (auto res = fetchers::getCache()->lookup(cacheKey))
|
||||
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256);
|
||||
|
||||
auto narHash = accessor->hashPath(CanonPath::root);
|
||||
|
||||
fetchers::getCache()->upsert(cacheKey, fetchers::Attrs({{"narHash", narHash.to_string(HashFormat::SRI, true)}}));
|
||||
|
||||
return narHash;
|
||||
}
|
||||
};
|
||||
|
||||
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
|
||||
|
|
|
@ -93,6 +93,12 @@ struct GitRepo
|
|||
virtual void verifyCommit(
|
||||
const Hash & rev,
|
||||
const std::vector<fetchers::PublicKey> & publicKeys) = 0;
|
||||
|
||||
/**
|
||||
* Given a Git tree hash, compute the hash of its NAR
|
||||
* serialisation. This is memoised on-disk.
|
||||
*/
|
||||
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;
|
||||
};
|
||||
|
||||
ref<GitRepo> getTarballCache();
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
#include "types.hh"
|
||||
#include "split.hh"
|
||||
#include "posix-source-accessor.hh"
|
||||
#include "fs-input-accessor.hh"
|
||||
#include "store-api.hh"
|
||||
#include "git-utils.hh"
|
||||
|
||||
namespace nix::fetchers {
|
||||
|
||||
|
@ -57,10 +60,8 @@ DownloadFileResult downloadFile(
|
|||
throw;
|
||||
}
|
||||
|
||||
// FIXME: write to temporary file.
|
||||
Attrs infoAttrs({
|
||||
{"etag", res.etag},
|
||||
{"url", res.effectiveUri},
|
||||
});
|
||||
|
||||
if (res.immutableUrl)
|
||||
|
@ -91,96 +92,98 @@ DownloadFileResult downloadFile(
|
|||
storePath = std::move(info.path);
|
||||
}
|
||||
|
||||
getCache()->add(
|
||||
*store,
|
||||
inAttrs,
|
||||
infoAttrs,
|
||||
*storePath,
|
||||
locked);
|
||||
|
||||
if (url != res.effectiveUri)
|
||||
/* Cache metadata for all URLs in the redirect chain. */
|
||||
for (auto & url : res.urls) {
|
||||
inAttrs.insert_or_assign("url", url);
|
||||
infoAttrs.insert_or_assign("url", *res.urls.rbegin());
|
||||
getCache()->add(
|
||||
*store,
|
||||
{
|
||||
{"type", "file"},
|
||||
{"url", res.effectiveUri},
|
||||
{"name", name},
|
||||
},
|
||||
inAttrs,
|
||||
infoAttrs,
|
||||
*storePath,
|
||||
locked);
|
||||
}
|
||||
|
||||
return {
|
||||
.storePath = std::move(*storePath),
|
||||
.etag = res.etag,
|
||||
.effectiveUrl = res.effectiveUri,
|
||||
.effectiveUrl = *res.urls.rbegin(),
|
||||
.immutableUrl = res.immutableUrl,
|
||||
};
|
||||
}
|
||||
|
||||
DownloadTarballResult downloadTarball(
|
||||
ref<Store> store,
|
||||
const std::string & url,
|
||||
const std::string & name,
|
||||
bool locked,
|
||||
const Headers & headers)
|
||||
{
|
||||
Attrs inAttrs({
|
||||
{"type", "tarball"},
|
||||
{"_what", "tarballCache"},
|
||||
{"url", url},
|
||||
{"name", name},
|
||||
});
|
||||
|
||||
auto cached = getCache()->lookupExpired(*store, inAttrs);
|
||||
auto cached = getCache()->lookupExpired(inAttrs);
|
||||
|
||||
auto attrsToResult = [&](const Attrs & infoAttrs)
|
||||
{
|
||||
auto treeHash = getRevAttr(infoAttrs, "treeHash");
|
||||
return DownloadTarballResult {
|
||||
.treeHash = treeHash,
|
||||
.lastModified = (time_t) getIntAttr(infoAttrs, "lastModified"),
|
||||
.immutableUrl = maybeGetStrAttr(infoAttrs, "immutableUrl"),
|
||||
.accessor = getTarballCache()->getAccessor(treeHash, false),
|
||||
};
|
||||
};
|
||||
|
||||
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->infoAttrs, "treeHash")))
|
||||
cached.reset();
|
||||
|
||||
if (cached && !cached->expired)
|
||||
return {
|
||||
.storePath = std::move(cached->storePath),
|
||||
.lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"),
|
||||
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
|
||||
};
|
||||
return attrsToResult(cached->infoAttrs);
|
||||
|
||||
auto res = downloadFile(store, url, name, locked, headers);
|
||||
auto _res = std::make_shared<Sync<FileTransferResult>>();
|
||||
|
||||
std::optional<StorePath> unpackedStorePath;
|
||||
time_t lastModified;
|
||||
|
||||
if (cached && res.etag != "" && getStrAttr(cached->infoAttrs, "etag") == res.etag) {
|
||||
unpackedStorePath = std::move(cached->storePath);
|
||||
lastModified = getIntAttr(cached->infoAttrs, "lastModified");
|
||||
} else {
|
||||
Path tmpDir = createTempDir();
|
||||
AutoDelete autoDelete(tmpDir, true);
|
||||
unpackTarfile(store->toRealPath(res.storePath), tmpDir);
|
||||
auto members = readDirectory(tmpDir);
|
||||
if (members.size() != 1)
|
||||
throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url);
|
||||
auto topDir = tmpDir + "/" + members.begin()->name;
|
||||
lastModified = lstat(topDir).st_mtime;
|
||||
PosixSourceAccessor accessor;
|
||||
unpackedStorePath = store->addToStore(name, accessor, CanonPath { topDir }, FileIngestionMethod::Recursive, HashAlgorithm::SHA256, {}, defaultPathFilter, NoRepair);
|
||||
}
|
||||
|
||||
Attrs infoAttrs({
|
||||
{"lastModified", uint64_t(lastModified)},
|
||||
{"etag", res.etag},
|
||||
auto source = sinkToSource([&](Sink & sink) {
|
||||
FileTransferRequest req(url);
|
||||
req.expectedETag = cached ? getStrAttr(cached->infoAttrs, "etag") : "";
|
||||
getFileTransfer()->download(std::move(req), sink,
|
||||
[_res](FileTransferResult r)
|
||||
{
|
||||
*_res->lock() = r;
|
||||
});
|
||||
});
|
||||
|
||||
if (res.immutableUrl)
|
||||
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
|
||||
// TODO: fall back to cached value if download fails.
|
||||
|
||||
getCache()->add(
|
||||
*store,
|
||||
inAttrs,
|
||||
infoAttrs,
|
||||
*unpackedStorePath,
|
||||
locked);
|
||||
/* Note: if the download is cached, `importTarball()` will receive
|
||||
no data, which causes it to import an empty tarball. */
|
||||
TarArchive archive { *source };
|
||||
auto parseSink = getTarballCache()->getFileSystemObjectSink();
|
||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||
|
||||
return {
|
||||
.storePath = std::move(*unpackedStorePath),
|
||||
.lastModified = lastModified,
|
||||
.immutableUrl = res.immutableUrl,
|
||||
};
|
||||
auto res(_res->lock());
|
||||
|
||||
Attrs infoAttrs;
|
||||
|
||||
if (res->cached) {
|
||||
infoAttrs = cached->infoAttrs;
|
||||
} else {
|
||||
infoAttrs.insert_or_assign("etag", res->etag);
|
||||
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
|
||||
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
|
||||
if (res->immutableUrl)
|
||||
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
|
||||
}
|
||||
|
||||
/* Insert a cache entry for every URL in the redirect chain. */
|
||||
for (auto & url : res->urls) {
|
||||
inAttrs.insert_or_assign("url", url);
|
||||
getCache()->upsert(inAttrs, infoAttrs);
|
||||
}
|
||||
|
||||
// FIXME: add a cache entry for immutableUrl? That could allow
|
||||
// cache poisoning.
|
||||
|
||||
return attrsToResult(infoAttrs);
|
||||
}
|
||||
|
||||
// An input scheme corresponding to a curl-downloadable resource.
|
||||
|
@ -198,6 +201,8 @@ struct CurlInputScheme : InputScheme
|
|||
|
||||
virtual bool isValidURL(const ParsedURL & url, bool requireTree) const = 0;
|
||||
|
||||
static const std::set<std::string> specialParams;
|
||||
|
||||
std::optional<Input> inputFromURL(const ParsedURL & _url, bool requireTree) const override
|
||||
{
|
||||
if (!isValidURL(_url, requireTree))
|
||||
|
@ -220,8 +225,12 @@ struct CurlInputScheme : InputScheme
|
|||
if (auto n = string2Int<uint64_t>(*i))
|
||||
input.attrs.insert_or_assign("revCount", *n);
|
||||
|
||||
url.query.erase("rev");
|
||||
url.query.erase("revCount");
|
||||
if (auto i = get(url.query, "lastModified"))
|
||||
if (auto n = string2Int<uint64_t>(*i))
|
||||
input.attrs.insert_or_assign("lastModified", *n);
|
||||
|
||||
for (auto & param : allowedAttrs())
|
||||
url.query.erase(param);
|
||||
|
||||
input.attrs.insert_or_assign("type", std::string { schemeName() });
|
||||
input.attrs.insert_or_assign("url", url.to_string());
|
||||
|
@ -275,10 +284,20 @@ struct FileInputScheme : CurlInputScheme
|
|||
: (!requireTree && !hasTarballExtension(url.path)));
|
||||
}
|
||||
|
||||
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
|
||||
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
|
||||
{
|
||||
auto input(_input);
|
||||
|
||||
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
|
||||
return {std::move(file.storePath), input};
|
||||
|
||||
auto narHash = store->queryPathInfo(file.storePath)->narHash;
|
||||
input.attrs.insert_or_assign("narHash", narHash.to_string(HashFormat::SRI, true));
|
||||
|
||||
auto accessor = makeStorePathAccessor(store, file.storePath);
|
||||
|
||||
accessor->setPathDisplay("«" + input.to_string() + "»");
|
||||
|
||||
return {accessor, input};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -296,11 +315,13 @@ struct TarballInputScheme : CurlInputScheme
|
|||
: (requireTree || hasTarballExtension(url.path)));
|
||||
}
|
||||
|
||||
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
|
||||
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
|
||||
{
|
||||
Input input(_input);
|
||||
auto url = getStrAttr(input.attrs, "url");
|
||||
auto result = downloadTarball(store, url, input.getName(), false);
|
||||
auto input(_input);
|
||||
|
||||
auto result = downloadTarball(getStrAttr(input.attrs, "url"), {});
|
||||
|
||||
result.accessor->setPathDisplay("«" + input.to_string() + "»");
|
||||
|
||||
if (result.immutableUrl) {
|
||||
auto immutableInput = Input::fromURL(*result.immutableUrl);
|
||||
|
@ -314,7 +335,10 @@ struct TarballInputScheme : CurlInputScheme
|
|||
if (result.lastModified && !input.attrs.contains("lastModified"))
|
||||
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));
|
||||
|
||||
return {result.storePath, std::move(input)};
|
||||
input.attrs.insert_or_assign("narHash",
|
||||
getTarballCache()->treeHashToNarHash(result.treeHash).to_string(HashFormat::SRI, true));
|
||||
|
||||
return {result.accessor, input};
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
|
||||
#include "types.hh"
|
||||
#include "path.hh"
|
||||
#include "hash.hh"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace nix {
|
||||
class Store;
|
||||
struct InputAccessor;
|
||||
}
|
||||
|
||||
namespace nix::fetchers {
|
||||
|
@ -28,16 +30,18 @@ DownloadFileResult downloadFile(
|
|||
|
||||
struct DownloadTarballResult
|
||||
{
|
||||
StorePath storePath;
|
||||
Hash treeHash;
|
||||
time_t lastModified;
|
||||
std::optional<std::string> immutableUrl;
|
||||
ref<InputAccessor> accessor;
|
||||
};
|
||||
|
||||
/**
|
||||
* Download and import a tarball into the Git cache. The result is the
|
||||
* Git tree hash of the root directory.
|
||||
*/
|
||||
DownloadTarballResult downloadTarball(
|
||||
ref<Store> store,
|
||||
const std::string & url,
|
||||
const std::string & name,
|
||||
bool locked,
|
||||
const Headers & headers = {});
|
||||
|
||||
}
|
||||
|
|
|
@ -106,6 +106,8 @@ struct curlFileTransfer : public FileTransfer
|
|||
this->result.data.append(data);
|
||||
})
|
||||
{
|
||||
result.urls.push_back(request.uri);
|
||||
|
||||
requestHeaders = curl_slist_append(requestHeaders, "Accept-Encoding: zstd, br, gzip, deflate, bzip2, xz");
|
||||
if (!request.expectedETag.empty())
|
||||
requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str());
|
||||
|
@ -182,6 +184,14 @@ struct curlFileTransfer : public FileTransfer
|
|||
return ((TransferItem *) userp)->writeCallback(contents, size, nmemb);
|
||||
}
|
||||
|
||||
void appendCurrentUrl()
|
||||
{
|
||||
char * effectiveUriCStr = nullptr;
|
||||
curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr);
|
||||
if (effectiveUriCStr && *result.urls.rbegin() != effectiveUriCStr)
|
||||
result.urls.push_back(effectiveUriCStr);
|
||||
}
|
||||
|
||||
size_t headerCallback(void * contents, size_t size, size_t nmemb)
|
||||
{
|
||||
size_t realSize = size * nmemb;
|
||||
|
@ -196,6 +206,7 @@ struct curlFileTransfer : public FileTransfer
|
|||
statusMsg = trim(match.str(1));
|
||||
acceptRanges = false;
|
||||
encoding = "";
|
||||
appendCurrentUrl();
|
||||
} else {
|
||||
|
||||
auto i = line.find(':');
|
||||
|
@ -360,14 +371,11 @@ struct curlFileTransfer : public FileTransfer
|
|||
{
|
||||
auto httpStatus = getHTTPStatus();
|
||||
|
||||
char * effectiveUriCStr = nullptr;
|
||||
curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr);
|
||||
if (effectiveUriCStr)
|
||||
result.effectiveUri = effectiveUriCStr;
|
||||
|
||||
debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes",
|
||||
request.verb(), request.uri, code, httpStatus, result.bodySize);
|
||||
|
||||
appendCurrentUrl();
|
||||
|
||||
if (decompressionSink) {
|
||||
try {
|
||||
decompressionSink->finish();
|
||||
|
@ -779,7 +787,10 @@ FileTransferResult FileTransfer::upload(const FileTransferRequest & request)
|
|||
return enqueueFileTransfer(request).get();
|
||||
}
|
||||
|
||||
void FileTransfer::download(FileTransferRequest && request, Sink & sink)
|
||||
void FileTransfer::download(
|
||||
FileTransferRequest && request,
|
||||
Sink & sink,
|
||||
std::function<void(FileTransferResult)> resultCallback)
|
||||
{
|
||||
/* Note: we can't call 'sink' via request.dataCallback, because
|
||||
that would cause the sink to execute on the fileTransfer
|
||||
|
@ -829,11 +840,13 @@ void FileTransfer::download(FileTransferRequest && request, Sink & sink)
|
|||
};
|
||||
|
||||
enqueueFileTransfer(request,
|
||||
{[_state](std::future<FileTransferResult> fut) {
|
||||
{[_state, resultCallback{std::move(resultCallback)}](std::future<FileTransferResult> fut) {
|
||||
auto state(_state->lock());
|
||||
state->quit = true;
|
||||
try {
|
||||
fut.get();
|
||||
auto res = fut.get();
|
||||
if (resultCallback)
|
||||
resultCallback(std::move(res));
|
||||
} catch (...) {
|
||||
state->exc = std::current_exception();
|
||||
}
|
||||
|
|
|
@ -75,14 +75,34 @@ struct FileTransferRequest
|
|||
|
||||
struct FileTransferResult
|
||||
{
|
||||
/**
|
||||
* Whether this is a cache hit (i.e. the ETag supplied in the
|
||||
* request is still valid). If so, `data` is empty.
|
||||
*/
|
||||
bool cached = false;
|
||||
|
||||
/**
|
||||
* The ETag of the object.
|
||||
*/
|
||||
std::string etag;
|
||||
std::string effectiveUri;
|
||||
|
||||
/**
|
||||
* All URLs visited in the redirect chain.
|
||||
*/
|
||||
std::vector<std::string> urls;
|
||||
|
||||
/**
|
||||
* The response body.
|
||||
*/
|
||||
std::string data;
|
||||
|
||||
uint64_t bodySize = 0;
|
||||
/* An "immutable" URL for this resource (i.e. one whose contents
|
||||
will never change), as returned by the `Link: <url>;
|
||||
rel="immutable"` header. */
|
||||
|
||||
/**
|
||||
* An "immutable" URL for this resource (i.e. one whose contents
|
||||
* will never change), as returned by the `Link: <url>;
|
||||
* rel="immutable"` header.
|
||||
*/
|
||||
std::optional<std::string> immutableUrl;
|
||||
};
|
||||
|
||||
|
@ -116,7 +136,10 @@ struct FileTransfer
|
|||
* Download a file, writing its data to a sink. The sink will be
|
||||
* invoked on the thread of the caller.
|
||||
*/
|
||||
void download(FileTransferRequest && request, Sink & sink);
|
||||
void download(
|
||||
FileTransferRequest && request,
|
||||
Sink & sink,
|
||||
std::function<void(FileTransferResult)> resultCallback = {});
|
||||
|
||||
enum Error { NotFound, Forbidden, Misc, Transient, Interrupted };
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue