Cache git revCount / lastModified attributes

Especially revCount is very slow to compute since it requires querying
the entire history.
This commit is contained in:
Eelco Dolstra 2022-08-11 12:37:10 +02:00
parent 3b45475f75
commit c0d33087c8
3 changed files with 100 additions and 22 deletions

View file

@ -17,6 +17,12 @@ create table if not exists Cache (
timestamp integer not null,
primary key (input)
);
create table if not exists Facts (
name text not null,
value text not null,
primary key (name)
);
)sql";
struct CacheImpl : Cache
@ -24,7 +30,7 @@ struct CacheImpl : Cache
struct State
{
SQLite db;
SQLiteStmt add, lookup;
SQLiteStmt add, lookup, upsertFact, queryFact;
};
Sync<State> _state;
@ -33,7 +39,7 @@ struct CacheImpl : Cache
{
auto state(_state.lock());
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite";
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite";
createDirs(dirOf(dbPath));
state->db = SQLite(dbPath);
@ -45,6 +51,12 @@ struct CacheImpl : Cache
state->lookup.create(state->db,
"select info, path, immutable, timestamp from Cache where input = ?");
state->upsertFact.create(state->db,
"insert or replace into Facts(name, value) values (?, ?)");
state->queryFact.create(state->db,
"select value from Facts where name = ?");
}
void add(
@ -110,6 +122,25 @@ struct CacheImpl : Cache
.storePath = std::move(storePath)
};
}
void upsertFact(
std::string_view key,
std::string_view value) override
{
_state.lock()->upsertFact.use()
(key)
(value).exec();
}
std::optional<std::string> queryFact(std::string_view key) override
{
auto state(_state.lock());
auto stmt(state->queryFact.use()(key));
if (!stmt.next()) return {};
return stmt.getStr(0);
}
};
ref<Cache> getCache()

View file

@ -30,6 +30,14 @@ struct Cache
virtual std::optional<Result> lookupExpired(
ref<Store> store,
const Attrs & inAttrs) = 0;
/* A simple key/value store for immutable facts such as the
revcount corresponding to a rev. */
virtual void upsertFact(
std::string_view key,
std::string_view value) = 0;
virtual std::optional<std::string> queryFact(std::string_view key) = 0;
};
ref<Cache> getCache();

View file

@ -401,11 +401,15 @@ struct GitInputScheme : InputScheme
return res;
}
void updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const
Hash updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const
{
if (!input.getRev())
input.attrs.insert_or_assign("rev",
Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1).gitRev());
if (auto r = input.getRev())
return *r;
else {
auto rev = Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1);
input.attrs.insert_or_assign("rev", rev.gitRev());
return rev;
}
}
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const std::string & ref) const
@ -418,15 +422,46 @@ struct GitInputScheme : InputScheme
: 0;
}
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
if (!repoInfo.hasHead) return 0;
auto key = fmt("git-%s-last-modified", rev.gitRev());
auto cache = getCache();
if (auto lastModifiedS = cache->queryFact(key)) {
if (auto lastModified = string2Int<uint64_t>(*lastModifiedS))
return *lastModified;
}
auto lastModified = getLastModified(repoInfo, repoDir, rev.gitRev());
cache->upsertFact(key, std::to_string(lastModified));
return lastModified;
}
uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
// FIXME: cache this.
return
repoInfo.hasHead
? std::stoull(
if (!repoInfo.hasHead) return 0;
auto key = fmt("git-%s-revcount", rev.gitRev());
auto cache = getCache();
if (auto revCountS = cache->queryFact(key)) {
if (auto revCount = string2Int<uint64_t>(*revCountS))
return *revCount;
}
auto revCount = std::stoull(
runProgram("git", true,
{ "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() }))
: 0;
{ "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() }));
cache->upsertFact(key, std::to_string(revCount));
return revCount;
}
std::string getDefaultRef(const RepoInfo & repoInfo) const
@ -664,7 +699,7 @@ struct GitInputScheme : InputScheme
Attrs infoAttrs({
{"rev", rev.gitRev()},
{"lastModified", getLastModified(repoInfo, repoDir, rev.gitRev())},
{"lastModified", getLastModified(repoInfo, repoDir, rev)},
});
if (!repoInfo.shallow)
@ -717,18 +752,22 @@ struct GitInputScheme : InputScheme
input.attrs.insert_or_assign("ref", ref);
if (!repoInfo.isDirty) {
updateRev(input, repoInfo, ref);
auto rev = updateRev(input, repoInfo, ref);
input.attrs.insert_or_assign(
"revCount",
getRevCount(repoInfo, repoInfo.url, *input.getRev()));
}
getRevCount(repoInfo, repoInfo.url, rev));
input.attrs.insert_or_assign(
"lastModified",
getLastModified(repoInfo, repoInfo.url, rev));
} else {
// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
getLastModified(repoInfo, repoInfo.url, ref));
}
return {makeFSInputAccessor(CanonPath(repoInfo.url), listFiles(repoInfo), std::move(makeNotAllowedError)), input};
}