Cache git revCount / lastModified attributes

Especially revCount is very slow to compute since it requires querying
the entire history.
This commit is contained in:
Eelco Dolstra 2022-08-11 12:37:10 +02:00
parent 3b45475f75
commit c0d33087c8
3 changed files with 100 additions and 22 deletions

View file

@ -17,6 +17,12 @@ create table if not exists Cache (
timestamp integer not null, timestamp integer not null,
primary key (input) primary key (input)
); );
create table if not exists Facts (
name text not null,
value text not null,
primary key (name)
);
)sql"; )sql";
struct CacheImpl : Cache struct CacheImpl : Cache
@ -24,7 +30,7 @@ struct CacheImpl : Cache
struct State struct State
{ {
SQLite db; SQLite db;
SQLiteStmt add, lookup; SQLiteStmt add, lookup, upsertFact, queryFact;
}; };
Sync<State> _state; Sync<State> _state;
@ -33,7 +39,7 @@ struct CacheImpl : Cache
{ {
auto state(_state.lock()); auto state(_state.lock());
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite"; auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite";
createDirs(dirOf(dbPath)); createDirs(dirOf(dbPath));
state->db = SQLite(dbPath); state->db = SQLite(dbPath);
@ -45,6 +51,12 @@ struct CacheImpl : Cache
state->lookup.create(state->db, state->lookup.create(state->db,
"select info, path, immutable, timestamp from Cache where input = ?"); "select info, path, immutable, timestamp from Cache where input = ?");
state->upsertFact.create(state->db,
"insert or replace into Facts(name, value) values (?, ?)");
state->queryFact.create(state->db,
"select value from Facts where name = ?");
} }
void add( void add(
@ -110,6 +122,25 @@ struct CacheImpl : Cache
.storePath = std::move(storePath) .storePath = std::move(storePath)
}; };
} }
void upsertFact(
std::string_view key,
std::string_view value) override
{
_state.lock()->upsertFact.use()
(key)
(value).exec();
}
std::optional<std::string> queryFact(std::string_view key) override
{
auto state(_state.lock());
auto stmt(state->queryFact.use()(key));
if (!stmt.next()) return {};
return stmt.getStr(0);
}
}; };
ref<Cache> getCache() ref<Cache> getCache()

View file

@ -30,6 +30,14 @@ struct Cache
virtual std::optional<Result> lookupExpired( virtual std::optional<Result> lookupExpired(
ref<Store> store, ref<Store> store,
const Attrs & inAttrs) = 0; const Attrs & inAttrs) = 0;
/* A simple key/value store for immutable facts such as the
revcount corresponding to a rev. */
virtual void upsertFact(
std::string_view key,
std::string_view value) = 0;
virtual std::optional<std::string> queryFact(std::string_view key) = 0;
}; };
ref<Cache> getCache(); ref<Cache> getCache();

View file

@ -401,11 +401,15 @@ struct GitInputScheme : InputScheme
return res; return res;
} }
void updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const Hash updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const
{ {
if (!input.getRev()) if (auto r = input.getRev())
input.attrs.insert_or_assign("rev", return *r;
Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1).gitRev()); else {
auto rev = Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1);
input.attrs.insert_or_assign("rev", rev.gitRev());
return rev;
}
} }
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const std::string & ref) const uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const std::string & ref) const
@ -418,15 +422,46 @@ struct GitInputScheme : InputScheme
: 0; : 0;
} }
uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{
if (!repoInfo.hasHead) return 0;
auto key = fmt("git-%s-last-modified", rev.gitRev());
auto cache = getCache();
if (auto lastModifiedS = cache->queryFact(key)) {
if (auto lastModified = string2Int<uint64_t>(*lastModifiedS))
return *lastModified;
}
auto lastModified = getLastModified(repoInfo, repoDir, rev.gitRev());
cache->upsertFact(key, std::to_string(lastModified));
return lastModified;
}
uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const
{ {
// FIXME: cache this. if (!repoInfo.hasHead) return 0;
return
repoInfo.hasHead auto key = fmt("git-%s-revcount", rev.gitRev());
? std::stoull(
runProgram("git", true, auto cache = getCache();
{ "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() }))
: 0; if (auto revCountS = cache->queryFact(key)) {
if (auto revCount = string2Int<uint64_t>(*revCountS))
return *revCount;
}
auto revCount = std::stoull(
runProgram("git", true,
{ "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() }));
cache->upsertFact(key, std::to_string(revCount));
return revCount;
} }
std::string getDefaultRef(const RepoInfo & repoInfo) const std::string getDefaultRef(const RepoInfo & repoInfo) const
@ -664,7 +699,7 @@ struct GitInputScheme : InputScheme
Attrs infoAttrs({ Attrs infoAttrs({
{"rev", rev.gitRev()}, {"rev", rev.gitRev()},
{"lastModified", getLastModified(repoInfo, repoDir, rev.gitRev())}, {"lastModified", getLastModified(repoInfo, repoDir, rev)},
}); });
if (!repoInfo.shallow) if (!repoInfo.shallow)
@ -717,18 +752,22 @@ struct GitInputScheme : InputScheme
input.attrs.insert_or_assign("ref", ref); input.attrs.insert_or_assign("ref", ref);
if (!repoInfo.isDirty) { if (!repoInfo.isDirty) {
updateRev(input, repoInfo, ref); auto rev = updateRev(input, repoInfo, ref);
input.attrs.insert_or_assign( input.attrs.insert_or_assign(
"revCount", "revCount",
getRevCount(repoInfo, repoInfo.url, *input.getRev())); getRevCount(repoInfo, repoInfo.url, rev));
}
// FIXME: maybe we should use the timestamp of the last input.attrs.insert_or_assign(
// modified dirty file? "lastModified",
input.attrs.insert_or_assign( getLastModified(repoInfo, repoInfo.url, rev));
"lastModified", } else {
getLastModified(repoInfo, repoInfo.url, ref)); // FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
getLastModified(repoInfo, repoInfo.url, ref));
}
return {makeFSInputAccessor(CanonPath(repoInfo.url), listFiles(repoInfo), std::move(makeNotAllowedError)), input}; return {makeFSInputAccessor(CanonPath(repoInfo.url), listFiles(repoInfo), std::move(makeNotAllowedError)), input};
} }