Simplify the fetcher cache

This commit is contained in:
Eelco Dolstra 2024-04-10 20:59:18 +02:00
parent d084c1cb41
commit aad11f4496
8 changed files with 187 additions and 173 deletions

View file

@ -11,12 +11,11 @@ namespace nix::fetchers {
static const char * schema = R"sql(
create table if not exists Cache (
input text not null,
info text not null,
path text not null,
immutable integer not null, /* obsolete */
domain text not null,
key text not null,
value text not null,
timestamp integer not null,
primary key (input)
primary key (domain, key)
);
)sql";
@ -28,7 +27,7 @@ struct CacheImpl : Cache
struct State
{
SQLite db;
SQLiteStmt add, lookup;
SQLiteStmt upsert, lookup;
};
Sync<State> _state;
@ -37,133 +36,134 @@ struct CacheImpl : Cache
{
auto state(_state.lock());
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite";
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite";
createDirs(dirOf(dbPath));
state->db = SQLite(dbPath);
state->db.isCache();
state->db.exec(schema);
state->add.create(state->db,
"insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, false, ?)");
state->upsert.create(state->db,
"insert or replace into Cache(domain, key, value, timestamp) values (?, ?, ?, ?)");
state->lookup.create(state->db,
"select info, path, immutable, timestamp from Cache where input = ?");
"select value, timestamp from Cache where domain = ? and key = ?");
}
void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) override
std::string_view domain,
const Attrs & key,
const Attrs & value) override
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
("") // no path
_state.lock()->upsert.use()
(domain)
(attrsToJSON(key).dump())
(attrsToJSON(value).dump())
(time(0)).exec();
}
std::optional<Attrs> lookup(const Attrs & inAttrs) override
std::optional<Attrs> lookup(
std::string_view domain,
const Attrs & key) override
{
if (auto res = lookupExpired(inAttrs))
return std::move(res->infoAttrs);
if (auto res = lookupExpired(domain, key))
return std::move(res->value);
return {};
}
std::optional<Attrs> lookupWithTTL(const Attrs & inAttrs) override
std::optional<Attrs> lookupWithTTL(
std::string_view domain,
const Attrs & key) override
{
if (auto res = lookupExpired(inAttrs)) {
if (auto res = lookupExpired(domain, key)) {
if (!res->expired)
return std::move(res->infoAttrs);
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
}
return {};
}
std::optional<Result2> lookupExpired(const Attrs & inAttrs) override
{
auto state(_state.lock());
auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto stmt(state->lookup.use()(inAttrsJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
return {};
}
auto infoJSON = stmt.getStr(0);
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
debug("using cache entry '%s' -> '%s'", inAttrsJSON, infoJSON);
return Result2 {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
};
}
void add(
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath) override
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
(store.printStorePath(storePath))
(time(0)).exec();
}
std::optional<std::pair<Attrs, StorePath>> lookup(
Store & store,
const Attrs & inAttrs) override
{
if (auto res = lookupExpired(store, inAttrs)) {
if (!res->expired)
return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath));
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
return std::move(res->value);
debug("ignoring expired cache entry '%s:%s'",
domain, attrsToJSON(key).dump());
}
return {};
}
std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) override
std::string_view domain,
const Attrs & key) override
{
auto state(_state.lock());
auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto keyJSON = attrsToJSON(key).dump();
auto stmt(state->lookup.use()(inAttrsJSON));
auto stmt(state->lookup.use()(domain)(keyJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
debug("did not find cache entry for '%s:%s'", domain, keyJSON);
return {};
}
auto infoJSON = stmt.getStr(0);
auto storePath = store.parseStorePath(stmt.getStr(1));
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
auto valueJSON = stmt.getStr(0);
auto timestamp = stmt.getInt(1);
store.addTempRoot(storePath);
if (!store.isValidPath(storePath)) {
debug("using cache entry '%s:%s' -> '%s'", domain, keyJSON, valueJSON);
return Result {
.expired = settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0),
.value = jsonToAttrs(nlohmann::json::parse(valueJSON)),
};
}
void upsert(
std::string_view domain,
Attrs key,
Store & store,
Attrs value,
const StorePath & storePath)
{
/* Add the store prefix to the cache key to handle multiple
store prefixes. */
key.insert_or_assign("store", store.storeDir);
value.insert_or_assign("storePath", (std::string) storePath.to_string());
upsert(domain, key, value);
}
std::optional<ResultWithStorePath> lookupStorePath(
std::string_view domain,
Attrs key,
Store & store) override
{
key.insert_or_assign("store", store.storeDir);
auto res = lookupExpired(domain, key);
if (!res) return std::nullopt;
auto storePathS = getStrAttr(res->value, "storePath");
res->value.erase("storePath");
ResultWithStorePath res2(*res, StorePath(storePathS));
store.addTempRoot(res2.storePath);
if (!store.isValidPath(res2.storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
return {};
debug("ignoring disappeared cache entry '%s' -> '%s'",
attrsToJSON(key).dump(),
store.printStorePath(res2.storePath));
return std::nullopt;
}
debug("using cache entry '%s' -> '%s', '%s'",
inAttrsJSON, infoJSON, store.printStorePath(storePath));
attrsToJSON(key).dump(),
attrsToJSON(res2.value).dump(),
store.printStorePath(res2.storePath));
return Result {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
.storePath = std::move(storePath)
};
return res2;
}
std::optional<ResultWithStorePath> lookupStorePathWithTTL(
std::string_view domain,
Attrs key,
Store & store) override
{
auto res = lookupStorePath(domain, std::move(key), store);
return res && !res->expired ? res : std::nullopt;
}
};

View file

@ -19,56 +19,73 @@ struct Cache
* Attrs to Attrs.
*/
virtual void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) = 0;
std::string_view domain,
const Attrs & key,
const Attrs & value) = 0;
/**
* Look up a key with infinite TTL.
*/
virtual std::optional<Attrs> lookup(
const Attrs & inAttrs) = 0;
std::string_view domain,
const Attrs & key) = 0;
/**
* Look up a key. Return nothing if its TTL has exceeded
* `settings.tarballTTL`.
*/
virtual std::optional<Attrs> lookupWithTTL(
const Attrs & inAttrs) = 0;
std::string_view domain,
const Attrs & key) = 0;
struct Result2
struct Result
{
bool expired = false;
Attrs infoAttrs;
Attrs value;
};
/**
* Look up a key. Return a bool denoting whether its TTL has
* exceeded `settings.tarballTTL`.
*/
virtual std::optional<Result2> lookupExpired(
const Attrs & inAttrs) = 0;
virtual std::optional<Result> lookupExpired(
std::string_view domain,
const Attrs & key) = 0;
/* Old cache for things that have a store path. */
virtual void add(
/**
* Insert a cache entry that has a store path associated with
* it. Such cache entries are always considered stale if the
* associated store path is invalid.
*/
virtual void upsert(
std::string_view domain,
Attrs key,
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
Attrs value,
const StorePath & storePath) = 0;
virtual std::optional<std::pair<Attrs, StorePath>> lookup(
Store & store,
const Attrs & inAttrs) = 0;
struct Result
struct ResultWithStorePath : Result
{
bool expired = false;
Attrs infoAttrs;
StorePath storePath;
};
virtual std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) = 0;
/**
* Look up a store path in the cache. The returned store path will
* be valid, but it may be expired.
*/
virtual std::optional<ResultWithStorePath> lookupStorePath(
std::string_view domain,
Attrs key,
Store & store) = 0;
/**
* Look up a store path in the cache. Return nothing if its TTL
* has exceeded `settings.tarballTTL`.
*/
virtual std::optional<ResultWithStorePath> lookupStorePathWithTTL(
std::string_view domain,
Attrs key,
Store & store) = 0;
};
ref<Cache> getCache();

View file

@ -16,20 +16,19 @@ StorePath fetchToStore(
// FIXME: add an optimisation for the case where the accessor is
// an FSInputAccessor pointing to a store path.
auto domain = "fetchToStore";
std::optional<fetchers::Attrs> cacheKey;
if (!filter && path.accessor->fingerprint) {
cacheKey = fetchers::Attrs{
{"_what", "fetchToStore"},
{"store", store.storeDir},
{"name", std::string{name}},
{"fingerprint", *path.accessor->fingerprint},
{"method", std::string{method.render()}},
{"path", path.path.abs()}
};
if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) {
if (auto res = fetchers::getCache()->lookupStorePath(domain, *cacheKey, store)) {
debug("store path cache hit for '%s'", path);
return res->second;
return res->storePath;
}
} else
debug("source path '%s' is uncacheable", path);
@ -47,7 +46,7 @@ StorePath fetchToStore(
name, *path.accessor, path.path, method, HashAlgorithm::SHA256, {}, filter2, repair);
if (cacheKey && mode == FetchMode::Copy)
fetchers::getCache()->add(store, *cacheKey, {}, storePath);
fetchers::getCache()->upsert(domain, *cacheKey, store, {}, storePath);
return storePath;
}

View file

@ -456,14 +456,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
{
auto accessor = getAccessor(treeHash, false);
fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});
auto domain = "treeHashToNarHash";
fetchers::Attrs cacheKey({{"treeHash", treeHash.gitRev()}});
if (auto res = fetchers::getCache()->lookup(cacheKey))
if (auto res = fetchers::getCache()->lookup(domain, cacheKey))
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256);
auto narHash = accessor->hashPath(CanonPath::root);
fetchers::getCache()->upsert(cacheKey, fetchers::Attrs({{"narHash", narHash.to_string(HashFormat::SRI, true)}}));
fetchers::getCache()->upsert(domain, cacheKey, fetchers::Attrs({{"narHash", narHash.to_string(HashFormat::SRI, true)}}));
return narHash;
}

View file

@ -225,11 +225,13 @@ struct GitArchiveInputScheme : InputScheme
auto cache = getCache();
Attrs treeHashKey{{"_what", "gitRevToTreeHash"}, {"rev", rev->gitRev()}};
Attrs lastModifiedKey{{"_what", "gitRevToLastModified"}, {"rev", rev->gitRev()}};
auto treeHashDomain = "gitRevToTreeHash";
Attrs treeHashKey{{"rev", rev->gitRev()}};
auto lastModifiedDomain = "gitRevToLastModified";
Attrs lastModifiedKey{{"rev", rev->gitRev()}};
if (auto treeHashAttrs = cache->lookup(treeHashKey)) {
if (auto lastModifiedAttrs = cache->lookup(lastModifiedKey)) {
if (auto treeHashAttrs = cache->lookup(treeHashDomain, treeHashKey)) {
if (auto lastModifiedAttrs = cache->lookup(lastModifiedDomain, lastModifiedKey)) {
auto treeHash = getRevAttr(*treeHashAttrs, "treeHash");
auto lastModified = getIntAttr(*lastModifiedAttrs, "lastModified");
if (getTarballCache()->hasObject(treeHash))
@ -257,8 +259,8 @@ struct GitArchiveInputScheme : InputScheme
.lastModified = lastModified
};
cache->upsert(treeHashKey, Attrs{{"treeHash", tarballInfo.treeHash.gitRev()}});
cache->upsert(lastModifiedKey, Attrs{{"lastModified", (uint64_t) tarballInfo.lastModified}});
cache->upsert(treeHashDomain, treeHashKey, Attrs{{"treeHash", tarballInfo.treeHash.gitRev()}});
cache->upsert(lastModifiedDomain, lastModifiedKey, Attrs{{"lastModified", (uint64_t) tarballInfo.lastModified}});
#if 0
if (upstreamTreeHash != tarballInfo.treeHash)

View file

@ -23,21 +23,22 @@ DownloadFileResult downloadFile(
{
// FIXME: check store
Attrs inAttrs({
{"type", "file"},
auto domain = "file";
Attrs key({
{"url", url},
{"name", name},
});
auto cached = getCache()->lookupExpired(*store, inAttrs);
auto cached = getCache()->lookupStorePath(domain, key, *store);
auto useCached = [&]() -> DownloadFileResult
{
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
.etag = getStrAttr(cached->value, "etag"),
.effectiveUrl = getStrAttr(cached->value, "url"),
.immutableUrl = maybeGetStrAttr(cached->value, "immutableUrl"),
};
};
@ -47,7 +48,7 @@ DownloadFileResult downloadFile(
FileTransferRequest request(url);
request.headers = headers;
if (cached)
request.expectedETag = getStrAttr(cached->infoAttrs, "etag");
request.expectedETag = getStrAttr(cached->value, "etag");
FileTransferResult res;
try {
res = getFileTransfer()->download(request);
@ -93,13 +94,9 @@ DownloadFileResult downloadFile(
/* Cache metadata for all URLs in the redirect chain. */
for (auto & url : res.urls) {
inAttrs.insert_or_assign("url", url);
key.insert_or_assign("url", url);
infoAttrs.insert_or_assign("url", *res.urls.rbegin());
getCache()->add(
*store,
inAttrs,
infoAttrs,
*storePath);
getCache()->upsert(domain, key, *store, infoAttrs, *storePath);
}
return {
@ -114,12 +111,12 @@ DownloadTarballResult downloadTarball(
const std::string & url,
const Headers & headers)
{
Attrs inAttrs({
{"_what", "tarballCache"},
auto domain = "tarball";
Attrs cacheKey{
{"url", url},
});
};
auto cached = getCache()->lookupExpired(inAttrs);
auto cached = getCache()->lookupExpired(domain, cacheKey);
auto attrsToResult = [&](const Attrs & infoAttrs)
{
@ -132,19 +129,19 @@ DownloadTarballResult downloadTarball(
};
};
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->infoAttrs, "treeHash")))
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->value, "treeHash")))
cached.reset();
if (cached && !cached->expired)
/* We previously downloaded this tarball and it's younger than
`tarballTtl`, so no need to check the server. */
return attrsToResult(cached->infoAttrs);
return attrsToResult(cached->value);
auto _res = std::make_shared<Sync<FileTransferResult>>();
auto source = sinkToSource([&](Sink & sink) {
FileTransferRequest req(url);
req.expectedETag = cached ? getStrAttr(cached->infoAttrs, "etag") : "";
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
getFileTransfer()->download(std::move(req), sink,
[_res](FileTransferResult r)
{
@ -167,7 +164,7 @@ DownloadTarballResult downloadTarball(
if (res->cached) {
/* The server says that the previously downloaded version is
still current. */
infoAttrs = cached->infoAttrs;
infoAttrs = cached->value;
} else {
infoAttrs.insert_or_assign("etag", res->etag);
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
@ -178,8 +175,8 @@ DownloadTarballResult downloadTarball(
/* Insert a cache entry for every URL in the redirect chain. */
for (auto & url : res->urls) {
inAttrs.insert_or_assign("url", url);
getCache()->upsert(inAttrs, infoAttrs);
cacheKey.insert_or_assign("url", url);
getCache()->upsert(domain, cacheKey, infoAttrs);
}
// FIXME: add a cache entry for immutableUrl? That could allow

View file

@ -427,34 +427,36 @@ struct GitInputScheme : InputScheme
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
Attrs key{{"_what", "gitLastModified"}, {"rev", rev.gitRev()}};
auto domain = "gitLastModified";
Attrs key{{"rev", rev.gitRev()}};
auto cache = getCache();
if (auto res = cache->lookup(key))
if (auto res = cache->lookup(domain, key))
return getIntAttr(*res, "lastModified");
auto lastModified = GitRepo::openRepo(repoDir)->getLastModified(rev);
cache->upsert(key, Attrs{{"lastModified", lastModified}});
cache->upsert(domain, key, {{"lastModified", lastModified}});
return lastModified;
}
uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
Attrs key{{"_what", "gitRevCount"}, {"rev", rev.gitRev()}};
auto domain = "gitRevCount";
Attrs key{{"rev", rev.gitRev()}};
auto cache = getCache();
if (auto revCountAttrs = cache->lookup(key))
if (auto revCountAttrs = cache->lookup(domain, key))
return getIntAttr(*revCountAttrs, "revCount");
Activity act(*logger, lvlChatty, actUnknown, fmt("getting Git revision count of '%s'", repoInfo.url));
auto revCount = GitRepo::openRepo(repoDir)->getRevCount(rev);
cache->upsert(key, Attrs{{"revCount", revCount}});
cache->upsert(domain, key, Attrs{{"revCount", revCount}});
return revCount;
}

View file

@ -224,13 +224,13 @@ struct MercurialInputScheme : InputScheme
if (!input.getRef()) input.attrs.insert_or_assign("ref", "default");
auto revInfoCacheKey = [&](const Hash & rev)
auto revInfoDomain = "hgRev";
auto revInfoKey = [&](const Hash & rev)
{
if (rev.algo != HashAlgorithm::SHA1)
throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", rev.to_string(HashFormat::Base16, true));
return Attrs{
{"_what", "hgRev"},
{"store", store->storeDir},
{"name", name},
{"rev", input.getRev()->gitRev()}
@ -246,21 +246,21 @@ struct MercurialInputScheme : InputScheme
};
/* Check the cache for the most recent rev for this URL/ref. */
Attrs refToRevCacheKey{
{"_what", "hgRefToRev"},
auto refToRevDomain = "hgRefToRev";
Attrs refToRevKey{
{"url", actualUrl},
{"ref", *input.getRef()}
};
if (!input.getRev()) {
if (auto res = getCache()->lookupWithTTL(refToRevCacheKey))
if (auto res = getCache()->lookupWithTTL(refToRevDomain, refToRevKey))
input.attrs.insert_or_assign("rev", getRevAttr(*res, "rev").gitRev());
}
/* If we have a rev, check if we have a cached store path. */
if (auto rev = input.getRev()) {
if (auto res = getCache()->lookupExpired(*store, revInfoCacheKey(*rev)))
return makeResult(res->infoAttrs, res->storePath);
if (auto res = getCache()->lookupStorePath(revInfoDomain, revInfoKey(*rev), *store))
return makeResult(res->value, res->storePath);
}
Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(HashAlgorithm::SHA256, actualUrl).to_string(HashFormat::Nix32, false));
@ -309,8 +309,8 @@ struct MercurialInputScheme : InputScheme
/* Now that we have the rev, check the cache again for a
cached store path. */
if (auto res = getCache()->lookupExpired(*store, revInfoCacheKey(rev)))
return makeResult(res->infoAttrs, res->storePath);
if (auto res = getCache()->lookupStorePath(revInfoDomain, revInfoKey(rev), *store))
return makeResult(res->value, res->storePath);
Path tmpDir = createTempDir();
AutoDelete delTmpDir(tmpDir, true);
@ -327,13 +327,9 @@ struct MercurialInputScheme : InputScheme
});
if (!origRev)
getCache()->upsert(refToRevCacheKey, {{"rev", rev.gitRev()}});
getCache()->upsert(refToRevDomain, refToRevKey, {{"rev", rev.gitRev()}});
getCache()->add(
*store,
revInfoCacheKey(rev),
infoAttrs,
storePath);
getCache()->upsert(revInfoDomain, revInfoKey(rev), *store, infoAttrs, storePath);
return makeResult(infoAttrs, std::move(storePath));
}