From c0d33087c8b39aba4dbb797be98f05a52c4358fa Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 11 Aug 2022 12:37:10 +0200 Subject: [PATCH] Cache git revCount / lastModified attributes Especially revCount is very slow to compute since it requires querying the entire history. --- src/libfetchers/cache.cc | 35 +++++++++++++++++- src/libfetchers/cache.hh | 8 ++++ src/libfetchers/git.cc | 79 ++++++++++++++++++++++++++++++---------- 3 files changed, 100 insertions(+), 22 deletions(-) diff --git a/src/libfetchers/cache.cc b/src/libfetchers/cache.cc index 0c8ecac9d..58c2142e5 100644 --- a/src/libfetchers/cache.cc +++ b/src/libfetchers/cache.cc @@ -17,6 +17,12 @@ create table if not exists Cache ( timestamp integer not null, primary key (input) ); + +create table if not exists Facts ( + name text not null, + value text not null, + primary key (name) +); )sql"; struct CacheImpl : Cache @@ -24,7 +30,7 @@ struct CacheImpl : Cache struct State { SQLite db; - SQLiteStmt add, lookup; + SQLiteStmt add, lookup, upsertFact, queryFact; }; Sync _state; @@ -33,7 +39,7 @@ struct CacheImpl : Cache { auto state(_state.lock()); - auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite"; + auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite"; createDirs(dirOf(dbPath)); state->db = SQLite(dbPath); @@ -45,6 +51,12 @@ struct CacheImpl : Cache state->lookup.create(state->db, "select info, path, immutable, timestamp from Cache where input = ?"); + + state->upsertFact.create(state->db, + "insert or replace into Facts(name, value) values (?, ?)"); + + state->queryFact.create(state->db, + "select value from Facts where name = ?"); } void add( @@ -110,6 +122,25 @@ struct CacheImpl : Cache .storePath = std::move(storePath) }; } + + void upsertFact( + std::string_view key, + std::string_view value) override + { + _state.lock()->upsertFact.use() + (key) + (value).exec(); + } + + std::optional queryFact(std::string_view key) override + { + auto state(_state.lock()); + + auto stmt(state->queryFact.use()(key)); + if (!stmt.next()) return {}; + + return stmt.getStr(0); + } }; ref getCache() diff --git a/src/libfetchers/cache.hh b/src/libfetchers/cache.hh index 3a81030dd..2c46d1d15 100644 --- a/src/libfetchers/cache.hh +++ b/src/libfetchers/cache.hh @@ -30,6 +30,14 @@ struct Cache virtual std::optional lookupExpired( ref store, const Attrs & inAttrs) = 0; + + /* A simple key/value store for immutable facts such as the + revcount corresponding to a rev. */ + virtual void upsertFact( + std::string_view key, + std::string_view value) = 0; + + virtual std::optional queryFact(std::string_view key) = 0; }; ref getCache(); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 181507b18..0c54e13c9 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -401,11 +401,15 @@ struct GitInputScheme : InputScheme return res; } - void updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const + Hash updateRev(Input & input, const RepoInfo & repoInfo, const std::string & ref) const { - if (!input.getRev()) - input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1).gitRev()); + if (auto r = input.getRev()) + return *r; + else { + auto rev = Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "rev-parse", ref })), htSHA1); + input.attrs.insert_or_assign("rev", rev.gitRev()); + return rev; + } } uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const std::string & ref) const @@ -418,15 +422,46 @@ struct GitInputScheme : InputScheme : 0; } + uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const + { + if (!repoInfo.hasHead) return 0; + + auto key = fmt("git-%s-last-modified", rev.gitRev()); + + auto cache = getCache(); + + if (auto lastModifiedS = cache->queryFact(key)) { + if (auto lastModified = string2Int(*lastModifiedS)) + return *lastModified; + } + + auto lastModified = getLastModified(repoInfo, repoDir, rev.gitRev()); + + cache->upsertFact(key, std::to_string(lastModified)); + + return lastModified; + } + uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const { - // FIXME: cache this. - return - repoInfo.hasHead - ? std::stoull( - runProgram("git", true, - { "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() })) - : 0; + if (!repoInfo.hasHead) return 0; + + auto key = fmt("git-%s-revcount", rev.gitRev()); + + auto cache = getCache(); + + if (auto revCountS = cache->queryFact(key)) { + if (auto revCount = string2Int(*revCountS)) + return *revCount; + } + + auto revCount = std::stoull( + runProgram("git", true, + { "-C", repoDir, "--git-dir", repoInfo.gitDir, "rev-list", "--count", rev.gitRev() })); + + cache->upsertFact(key, std::to_string(revCount)); + + return revCount; } std::string getDefaultRef(const RepoInfo & repoInfo) const @@ -664,7 +699,7 @@ struct GitInputScheme : InputScheme Attrs infoAttrs({ {"rev", rev.gitRev()}, - {"lastModified", getLastModified(repoInfo, repoDir, rev.gitRev())}, + {"lastModified", getLastModified(repoInfo, repoDir, rev)}, }); if (!repoInfo.shallow) @@ -717,18 +752,22 @@ struct GitInputScheme : InputScheme input.attrs.insert_or_assign("ref", ref); if (!repoInfo.isDirty) { - updateRev(input, repoInfo, ref); + auto rev = updateRev(input, repoInfo, ref); input.attrs.insert_or_assign( "revCount", - getRevCount(repoInfo, repoInfo.url, *input.getRev())); - } + getRevCount(repoInfo, repoInfo.url, rev)); - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - getLastModified(repoInfo, repoInfo.url, ref)); + input.attrs.insert_or_assign( + "lastModified", + getLastModified(repoInfo, repoInfo.url, rev)); + } else { + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + getLastModified(repoInfo, repoInfo.url, ref)); + } return {makeFSInputAccessor(CanonPath(repoInfo.url), listFiles(repoInfo), std::move(makeNotAllowedError)), input}; }