Merge pull request #10465 from edolstra/remove-locked

Fetcher cache cleanups
This commit is contained in:
Eelco Dolstra 2024-05-06 21:32:26 +02:00 committed by GitHub
commit da3381d51f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 200 additions and 214 deletions

View file

@ -11,12 +11,11 @@ namespace nix::fetchers {
static const char * schema = R"sql(
create table if not exists Cache (
input text not null,
info text not null,
path text not null,
immutable integer not null,
domain text not null,
key text not null,
value text not null,
timestamp integer not null,
primary key (input)
primary key (domain, key)
);
)sql";
@ -28,7 +27,7 @@ struct CacheImpl : Cache
struct State
{
SQLite db;
SQLiteStmt add, lookup;
SQLiteStmt upsert, lookup;
};
Sync<State> _state;
@ -37,137 +36,130 @@ struct CacheImpl : Cache
{
auto state(_state.lock());
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite";
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite";
createDirs(dirOf(dbPath));
state->db = SQLite(dbPath);
state->db.isCache();
state->db.exec(schema);
state->add.create(state->db,
"insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, ?, ?)");
state->upsert.create(state->db,
"insert or replace into Cache(domain, key, value, timestamp) values (?, ?, ?, ?)");
state->lookup.create(state->db,
"select info, path, immutable, timestamp from Cache where input = ?");
"select value, timestamp from Cache where domain = ? and key = ?");
}
void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) override
const Key & key,
const Attrs & value) override
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
("") // no path
(false)
_state.lock()->upsert.use()
(key.first)
(attrsToJSON(key.second).dump())
(attrsToJSON(value).dump())
(time(0)).exec();
}
std::optional<Attrs> lookup(const Attrs & inAttrs) override
std::optional<Attrs> lookup(
const Key & key) override
{
if (auto res = lookupExpired(inAttrs))
return std::move(res->infoAttrs);
if (auto res = lookupExpired(key))
return std::move(res->value);
return {};
}
std::optional<Attrs> lookupWithTTL(const Attrs & inAttrs) override
std::optional<Attrs> lookupWithTTL(
const Key & key) override
{
if (auto res = lookupExpired(inAttrs)) {
if (auto res = lookupExpired(key)) {
if (!res->expired)
return std::move(res->infoAttrs);
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
}
return {};
}
std::optional<Result2> lookupExpired(const Attrs & inAttrs) override
{
auto state(_state.lock());
auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto stmt(state->lookup.use()(inAttrsJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
return {};
}
auto infoJSON = stmt.getStr(0);
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
debug("using cache entry '%s' -> '%s'", inAttrsJSON, infoJSON);
return Result2 {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
};
}
void add(
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
bool locked) override
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
(store.printStorePath(storePath))
(locked)
(time(0)).exec();
}
std::optional<std::pair<Attrs, StorePath>> lookup(
Store & store,
const Attrs & inAttrs) override
{
if (auto res = lookupExpired(store, inAttrs)) {
if (!res->expired)
return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath));
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
return std::move(res->value);
debug("ignoring expired cache entry '%s:%s'",
key.first, attrsToJSON(key.second).dump());
}
return {};
}
std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) override
const Key & key) override
{
auto state(_state.lock());
auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto keyJSON = attrsToJSON(key.second).dump();
auto stmt(state->lookup.use()(inAttrsJSON));
auto stmt(state->lookup.use()(key.first)(keyJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
debug("did not find cache entry for '%s:%s'", key.first, keyJSON);
return {};
}
auto infoJSON = stmt.getStr(0);
auto storePath = store.parseStorePath(stmt.getStr(1));
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
auto valueJSON = stmt.getStr(0);
auto timestamp = stmt.getInt(1);
store.addTempRoot(storePath);
if (!store.isValidPath(storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
return {};
}
debug("using cache entry '%s' -> '%s', '%s'",
inAttrsJSON, infoJSON, store.printStorePath(storePath));
debug("using cache entry '%s:%s' -> '%s'", key.first, keyJSON, valueJSON);
return Result {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
.storePath = std::move(storePath)
.expired = settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0),
.value = jsonToAttrs(nlohmann::json::parse(valueJSON)),
};
}
void upsert(
Key key,
Store & store,
Attrs value,
const StorePath & storePath)
{
/* Add the store prefix to the cache key to handle multiple
store prefixes. */
key.second.insert_or_assign("store", store.storeDir);
value.insert_or_assign("storePath", (std::string) storePath.to_string());
upsert(key, value);
}
std::optional<ResultWithStorePath> lookupStorePath(
Key key,
Store & store) override
{
key.second.insert_or_assign("store", store.storeDir);
auto res = lookupExpired(key);
if (!res) return std::nullopt;
auto storePathS = getStrAttr(res->value, "storePath");
res->value.erase("storePath");
ResultWithStorePath res2(*res, StorePath(storePathS));
store.addTempRoot(res2.storePath);
if (!store.isValidPath(res2.storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s:%s' -> '%s'",
key.first,
attrsToJSON(key.second).dump(),
store.printStorePath(res2.storePath));
return std::nullopt;
}
debug("using cache entry '%s:%s' -> '%s', '%s'",
key.first,
attrsToJSON(key.second).dump(),
attrsToJSON(res2.value).dump(),
store.printStorePath(res2.storePath));
return res2;
}
std::optional<ResultWithStorePath> lookupStorePathWithTTL(
Key key,
Store & store) override
{
auto res = lookupStorePath(std::move(key), store);
return res && !res->expired ? res : std::nullopt;
}
};
ref<Cache> getCache()

View file

@ -15,61 +15,80 @@ struct Cache
virtual ~Cache() { }
/**
* Add a value to the cache. The cache is an arbitrary mapping of
* Attrs to Attrs.
* A domain is a partition of the key/value cache for a particular
* purpose, e.g. git revision to revcount.
*/
using Domain = std::string_view;
/**
* A cache key is a domain and an arbitrary set of attributes.
*/
using Key = std::pair<Domain, Attrs>;
/**
* Add a key/value pair to the cache.
*/
virtual void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) = 0;
const Key & key,
const Attrs & value) = 0;
/**
* Look up a key with infinite TTL.
*/
virtual std::optional<Attrs> lookup(
const Attrs & inAttrs) = 0;
const Key & key) = 0;
/**
* Look up a key. Return nothing if its TTL has exceeded
* `settings.tarballTTL`.
*/
virtual std::optional<Attrs> lookupWithTTL(
const Attrs & inAttrs) = 0;
const Key & key) = 0;
struct Result2
struct Result
{
bool expired = false;
Attrs infoAttrs;
Attrs value;
};
/**
* Look up a key. Return a bool denoting whether its TTL has
* exceeded `settings.tarballTTL`.
*/
virtual std::optional<Result2> lookupExpired(
const Attrs & inAttrs) = 0;
virtual std::optional<Result> lookupExpired(
const Key & key) = 0;
/* Old cache for things that have a store path. */
virtual void add(
/**
* Insert a cache entry that has a store path associated with
* it. Such cache entries are always considered stale if the
* associated store path is invalid.
*/
virtual void upsert(
Key key,
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
bool locked) = 0;
Attrs value,
const StorePath & storePath) = 0;
virtual std::optional<std::pair<Attrs, StorePath>> lookup(
Store & store,
const Attrs & inAttrs) = 0;
struct Result
struct ResultWithStorePath : Result
{
bool expired = false;
Attrs infoAttrs;
StorePath storePath;
};
virtual std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) = 0;
/**
* Look up a store path in the cache. The returned store path will
* be valid, but it may be expired.
*/
virtual std::optional<ResultWithStorePath> lookupStorePath(
Key key,
Store & store) = 0;
/**
* Look up a store path in the cache. Return nothing if its TTL
* has exceeded `settings.tarballTTL`.
*/
virtual std::optional<ResultWithStorePath> lookupStorePathWithTTL(
Key key,
Store & store) = 0;
};
ref<Cache> getCache();

View file

@ -16,20 +16,18 @@ StorePath fetchToStore(
// FIXME: add an optimisation for the case where the accessor is
// a `PosixSourceAccessor` pointing to a store path.
std::optional<fetchers::Attrs> cacheKey;
std::optional<fetchers::Cache::Key> cacheKey;
if (!filter && path.accessor->fingerprint) {
cacheKey = fetchers::Attrs{
{"_what", "fetchToStore"},
{"store", store.storeDir},
cacheKey = fetchers::Cache::Key{"fetchToStore", {
{"name", std::string{name}},
{"fingerprint", *path.accessor->fingerprint},
{"method", std::string{method.render()}},
{"path", path.path.abs()}
};
if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) {
}};
if (auto res = fetchers::getCache()->lookupStorePath(*cacheKey, store)) {
debug("store path cache hit for '%s'", path);
return res->second;
return res->storePath;
}
} else
debug("source path '%s' is uncacheable", path);
@ -47,10 +45,9 @@ StorePath fetchToStore(
name, path, method, HashAlgorithm::SHA256, {}, filter2, repair);
if (cacheKey && mode == FetchMode::Copy)
fetchers::getCache()->add(store, *cacheKey, {}, storePath, true);
fetchers::getCache()->upsert(*cacheKey, store, {}, storePath);
return storePath;
}
}

View file

@ -452,7 +452,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
{
auto accessor = getAccessor(treeHash, false);
fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});
fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}};
if (auto res = fetchers::getCache()->lookup(cacheKey))
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256);

View file

@ -225,8 +225,8 @@ struct GitArchiveInputScheme : InputScheme
auto cache = getCache();
Attrs treeHashKey{{"_what", "gitRevToTreeHash"}, {"rev", rev->gitRev()}};
Attrs lastModifiedKey{{"_what", "gitRevToLastModified"}, {"rev", rev->gitRev()}};
Cache::Key treeHashKey{"gitRevToTreeHash", {{"rev", rev->gitRev()}}};
Cache::Key lastModifiedKey{"gitRevToLastModified", {{"rev", rev->gitRev()}}};
if (auto treeHashAttrs = cache->lookup(treeHashKey)) {
if (auto lastModifiedAttrs = cache->lookup(lastModifiedKey)) {

View file

@ -22,21 +22,20 @@ DownloadFileResult downloadFile(
{
// FIXME: check store
Attrs inAttrs({
{"type", "file"},
Cache::Key key{"file", {{
{"url", url},
{"name", name},
});
}}};
auto cached = getCache()->lookupExpired(*store, inAttrs);
auto cached = getCache()->lookupStorePath(key, *store);
auto useCached = [&]() -> DownloadFileResult
{
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
.etag = getStrAttr(cached->value, "etag"),
.effectiveUrl = getStrAttr(cached->value, "url"),
.immutableUrl = maybeGetStrAttr(cached->value, "immutableUrl"),
};
};
@ -46,7 +45,7 @@ DownloadFileResult downloadFile(
FileTransferRequest request(url);
request.headers = headers;
if (cached)
request.expectedETag = getStrAttr(cached->infoAttrs, "etag");
request.expectedETag = getStrAttr(cached->value, "etag");
FileTransferResult res;
try {
res = getFileTransfer()->download(request);
@ -92,14 +91,9 @@ DownloadFileResult downloadFile(
/* Cache metadata for all URLs in the redirect chain. */
for (auto & url : res.urls) {
inAttrs.insert_or_assign("url", url);
key.second.insert_or_assign("url", url);
infoAttrs.insert_or_assign("url", *res.urls.rbegin());
getCache()->add(
*store,
inAttrs,
infoAttrs,
*storePath,
false);
getCache()->upsert(key, *store, infoAttrs, *storePath);
}
return {
@ -114,12 +108,9 @@ DownloadTarballResult downloadTarball(
const std::string & url,
const Headers & headers)
{
Attrs inAttrs({
{"_what", "tarballCache"},
{"url", url},
});
Cache::Key cacheKey{"tarball", {{"url", url}}};
auto cached = getCache()->lookupExpired(inAttrs);
auto cached = getCache()->lookupExpired(cacheKey);
auto attrsToResult = [&](const Attrs & infoAttrs)
{
@ -132,19 +123,19 @@ DownloadTarballResult downloadTarball(
};
};
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->infoAttrs, "treeHash")))
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->value, "treeHash")))
cached.reset();
if (cached && !cached->expired)
/* We previously downloaded this tarball and it's younger than
`tarballTtl`, so no need to check the server. */
return attrsToResult(cached->infoAttrs);
return attrsToResult(cached->value);
auto _res = std::make_shared<Sync<FileTransferResult>>();
auto source = sinkToSource([&](Sink & sink) {
FileTransferRequest req(url);
req.expectedETag = cached ? getStrAttr(cached->infoAttrs, "etag") : "";
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
getFileTransfer()->download(std::move(req), sink,
[_res](FileTransferResult r)
{
@ -167,7 +158,7 @@ DownloadTarballResult downloadTarball(
if (res->cached) {
/* The server says that the previously downloaded version is
still current. */
infoAttrs = cached->infoAttrs;
infoAttrs = cached->value;
} else {
infoAttrs.insert_or_assign("etag", res->etag);
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
@ -178,8 +169,8 @@ DownloadTarballResult downloadTarball(
/* Insert a cache entry for every URL in the redirect chain. */
for (auto & url : res->urls) {
inAttrs.insert_or_assign("url", url);
getCache()->upsert(inAttrs, infoAttrs);
cacheKey.second.insert_or_assign("url", url);
getCache()->upsert(cacheKey, infoAttrs);
}
// FIXME: add a cache entry for immutableUrl? That could allow

View file

@ -426,7 +426,7 @@ struct GitInputScheme : InputScheme
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
Attrs key{{"_what", "gitLastModified"}, {"rev", rev.gitRev()}};
Cache::Key key{"gitLastModified", {{"rev", rev.gitRev()}}};
auto cache = getCache();
@ -435,14 +435,14 @@ struct GitInputScheme : InputScheme
auto lastModified = GitRepo::openRepo(repoDir)->getLastModified(rev);
cache->upsert(key, Attrs{{"lastModified", lastModified}});
cache->upsert(key, {{"lastModified", lastModified}});
return lastModified;
}
uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
Attrs key{{"_what", "gitRevCount"}, {"rev", rev.gitRev()}};
Cache::Key key{"gitRevCount", {{"rev", rev.gitRev()}}};
auto cache = getCache();

View file

@ -222,22 +222,16 @@ struct MercurialInputScheme : InputScheme
if (!input.getRef()) input.attrs.insert_or_assign("ref", "default");
auto checkHashAlgorithm = [&](const std::optional<Hash> & hash)
auto revInfoKey = [&](const Hash & rev)
{
if (hash.has_value() && hash->algo != HashAlgorithm::SHA1)
throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", hash->to_string(HashFormat::Base16, true));
};
if (rev.algo != HashAlgorithm::SHA1)
throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", rev.to_string(HashFormat::Base16, true));
auto getLockedAttrs = [&]()
{
checkHashAlgorithm(input.getRev());
return Attrs({
{"type", "hg"},
return Cache::Key{"hgRev", {
{"store", store->storeDir},
{"name", name},
{"rev", input.getRev()->gitRev()},
});
{"rev", input.getRev()->gitRev()}
}};
};
auto makeResult = [&](const Attrs & infoAttrs, const StorePath & storePath) -> StorePath
@ -248,26 +242,21 @@ struct MercurialInputScheme : InputScheme
return storePath;
};
if (input.getRev()) {
if (auto res = getCache()->lookup(*store, getLockedAttrs()))
return makeResult(res->first, std::move(res->second));
/* Check the cache for the most recent rev for this URL/ref. */
Cache::Key refToRevKey{"hgRefToRev", {
{"url", actualUrl},
{"ref", *input.getRef()}
}};
if (!input.getRev()) {
if (auto res = getCache()->lookupWithTTL(refToRevKey))
input.attrs.insert_or_assign("rev", getRevAttr(*res, "rev").gitRev());
}
auto revOrRef = input.getRev() ? input.getRev()->gitRev() : *input.getRef();
Attrs unlockedAttrs({
{"type", "hg"},
{"name", name},
{"url", actualUrl},
{"ref", *input.getRef()},
});
if (auto res = getCache()->lookup(*store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), HashAlgorithm::SHA1);
if (!input.getRev() || input.getRev() == rev2) {
input.attrs.insert_or_assign("rev", rev2.gitRev());
return makeResult(res->first, std::move(res->second));
}
/* If we have a rev, check if we have a cached store path. */
if (auto rev = input.getRev()) {
if (auto res = getCache()->lookupStorePath(revInfoKey(*rev), *store))
return makeResult(res->value, res->storePath);
}
Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(HashAlgorithm::SHA256, actualUrl).to_string(HashFormat::Nix32, false));
@ -300,45 +289,42 @@ struct MercurialInputScheme : InputScheme
}
}
/* Fetch the remote rev or ref. */
auto tokens = tokenizeString<std::vector<std::string>>(
runHg({ "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" }));
runHg({
"log", "-R", cacheDir,
"-r", input.getRev() ? input.getRev()->gitRev() : *input.getRef(),
"--template", "{node} {rev} {branch}"
}));
assert(tokens.size() == 3);
input.attrs.insert_or_assign("rev", Hash::parseAny(tokens[0], HashAlgorithm::SHA1).gitRev());
auto rev = Hash::parseAny(tokens[0], HashAlgorithm::SHA1);
input.attrs.insert_or_assign("rev", rev.gitRev());
auto revCount = std::stoull(tokens[1]);
input.attrs.insert_or_assign("ref", tokens[2]);
if (auto res = getCache()->lookup(*store, getLockedAttrs()))
return makeResult(res->first, std::move(res->second));
/* Now that we have the rev, check the cache again for a
cached store path. */
if (auto res = getCache()->lookupStorePath(revInfoKey(rev), *store))
return makeResult(res->value, res->storePath);
Path tmpDir = createTempDir();
AutoDelete delTmpDir(tmpDir, true);
runHg({ "archive", "-R", cacheDir, "-r", input.getRev()->gitRev(), tmpDir });
runHg({ "archive", "-R", cacheDir, "-r", rev.gitRev(), tmpDir });
deletePath(tmpDir + "/.hg_archival.txt");
auto storePath = store->addToStore(name, {getFSSourceAccessor(), CanonPath(tmpDir)});
Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
{"revCount", (uint64_t) revCount},
});
if (!origRev)
getCache()->add(
*store,
unlockedAttrs,
infoAttrs,
storePath,
false);
getCache()->upsert(refToRevKey, {{"rev", rev.gitRev()}});
getCache()->add(
*store,
getLockedAttrs(),
infoAttrs,
storePath,
true);
getCache()->upsert(revInfoKey(rev), *store, infoAttrs, storePath);
return makeResult(infoAttrs, std::move(storePath));
}

View file

@ -101,6 +101,7 @@ path4=$(nix eval --impure --refresh --raw --expr "(builtins.fetchMercurial file:
[[ $path2 = $path4 ]]
echo paris > $repo/hello
# Passing a `name` argument should be reflected in the output path
path5=$(nix eval -vvvvv --impure --refresh --raw --expr "(builtins.fetchMercurial { url = \"file://$repo\"; name = \"foo\"; } ).outPath")
[[ $path5 =~ -foo$ ]]