mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2024-11-22 14:06:16 +02:00
Git fetcher: Improve submodule handling
Instead of making a complete copy of the repo, fetching the submodules, and writing the result to the store (which is all superexpensive), we now fetch the submodules recursively using the Git fetcher, and return a union accessor that "mounts" the accessors for the submodules on top of the root accessor.
This commit is contained in:
parent
ee36a44bf2
commit
d88106df24
6 changed files with 212 additions and 84 deletions
|
@ -1,11 +1,13 @@
|
|||
#include "git-utils.hh"
|
||||
#include "input-accessor.hh"
|
||||
#include "cache.hh"
|
||||
#include "finally.hh"
|
||||
|
||||
#include <boost/core/span.hpp>
|
||||
|
||||
#include <git2/blob.h>
|
||||
#include <git2/commit.h>
|
||||
#include <git2/config.h>
|
||||
#include <git2/describe.h>
|
||||
#include <git2/errors.h>
|
||||
#include <git2/global.h>
|
||||
|
@ -14,6 +16,7 @@
|
|||
#include <git2/remote.h>
|
||||
#include <git2/repository.h>
|
||||
#include <git2/status.h>
|
||||
#include <git2/submodule.h>
|
||||
#include <git2/tree.h>
|
||||
|
||||
#include <unordered_set>
|
||||
|
@ -63,6 +66,8 @@ typedef std::unique_ptr<git_reference, Deleter<git_reference_free>> Reference;
|
|||
typedef std::unique_ptr<git_describe_result, Deleter<git_describe_result_free>> DescribeResult;
|
||||
typedef std::unique_ptr<git_status_list, Deleter<git_status_list_free>> StatusList;
|
||||
typedef std::unique_ptr<git_remote, Deleter<git_remote_free>> Remote;
|
||||
typedef std::unique_ptr<git_config, Deleter<git_config_free>> GitConfig;
|
||||
typedef std::unique_ptr<git_config_iterator, Deleter<git_config_iterator_free>> ConfigIterator;
|
||||
|
||||
// A helper to ensure that we don't leak objects returned by libgit2.
|
||||
template<typename T>
|
||||
|
@ -256,6 +261,17 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<Submodule> getSubmodules(const Hash & rev) override;
|
||||
|
||||
std::string resolveSubmoduleUrl(const std::string & url) override
|
||||
{
|
||||
git_buf buf = GIT_BUF_INIT;
|
||||
if (git_submodule_resolve_url(&buf, *this, url.c_str()))
|
||||
throw Error("resolving Git submodule URL '%s'", url);
|
||||
Finally cleanup = [&]() { git_buf_dispose(&buf); };
|
||||
return buf.ptr;
|
||||
}
|
||||
|
||||
bool hasObject(const Hash & oid_) override
|
||||
{
|
||||
auto oid = hashToOID(oid_);
|
||||
|
@ -400,6 +416,16 @@ struct GitInputAccessor : InputAccessor
|
|||
return readBlob(path, true);
|
||||
}
|
||||
|
||||
Hash getSubmoduleRev(const CanonPath & path)
|
||||
{
|
||||
auto entry = need(path);
|
||||
|
||||
if (git_tree_entry_type(entry) != GIT_OBJECT_COMMIT)
|
||||
throw Error("'%s' is not a submodule", showPath(path));
|
||||
|
||||
return toHash(*git_tree_entry_id(entry));
|
||||
}
|
||||
|
||||
std::map<CanonPath, TreeEntry> lookupCache;
|
||||
|
||||
/* Recursively look up 'path' relative to the root. */
|
||||
|
@ -495,4 +521,56 @@ ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev)
|
|||
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev);
|
||||
}
|
||||
|
||||
std::vector<GitRepoImpl::Submodule> GitRepoImpl::getSubmodules(const Hash & rev)
|
||||
{
|
||||
/* Read the .gitmodules files from this revision. */
|
||||
CanonPath modulesFile(".gitmodules");
|
||||
|
||||
auto accessor = getAccessor(rev);
|
||||
if (!accessor->pathExists(modulesFile)) return {};
|
||||
|
||||
/* Parse it. */
|
||||
auto configS = accessor->readFile(modulesFile);
|
||||
|
||||
auto [fdTemp, pathTemp] = createTempFile("nix-git-submodules");
|
||||
writeFull(fdTemp.get(), configS);
|
||||
|
||||
GitConfig config;
|
||||
if (git_config_open_ondisk(Setter(config), pathTemp.c_str()))
|
||||
throw Error("parsing .gitmodules file: %s", git_error_last()->message);
|
||||
|
||||
ConfigIterator it;
|
||||
if (git_config_iterator_glob_new(Setter(it), config.get(), "^submodule\\..*\\.(path|url|branch)$"))
|
||||
throw Error("iterating over .gitmodules: %s", git_error_last()->message);
|
||||
|
||||
std::map<std::string, std::string> entries;
|
||||
|
||||
while (true) {
|
||||
git_config_entry * entry = nullptr;
|
||||
if (auto err = git_config_next(&entry, it.get())) {
|
||||
if (err == GIT_ITEROVER) break;
|
||||
throw Error("iterating over .gitmodules: %s", git_error_last()->message);
|
||||
}
|
||||
entries.emplace(entry->name + 10, entry->value);
|
||||
}
|
||||
|
||||
std::vector<Submodule> result;
|
||||
|
||||
for (auto & [key, value] : entries) {
|
||||
if (!hasSuffix(key, ".path")) continue;
|
||||
std::string key2(key, 0, key.size() - 5);
|
||||
auto path = CanonPath(value);
|
||||
auto rev = accessor.dynamic_pointer_cast<GitInputAccessor>()->getSubmoduleRev(path);
|
||||
result.push_back(Submodule {
|
||||
.path = path,
|
||||
.url = entries[key2 + ".url"],
|
||||
.branch = entries[key2 + ".branch"],
|
||||
.rev = rev,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -38,6 +38,18 @@ struct GitRepo
|
|||
/* Get the ref that HEAD points to. */
|
||||
virtual std::optional<std::string> getWorkdirRef() = 0;
|
||||
|
||||
struct Submodule
|
||||
{
|
||||
CanonPath path;
|
||||
std::string url;
|
||||
std::string branch;
|
||||
Hash rev;
|
||||
};
|
||||
|
||||
virtual std::vector<Submodule> getSubmodules(const Hash & rev) = 0;
|
||||
|
||||
virtual std::string resolveSubmoduleUrl(const std::string & url) = 0;
|
||||
|
||||
struct TarballInfo
|
||||
{
|
||||
Hash treeHash;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "util.hh"
|
||||
#include "git.hh"
|
||||
#include "fs-input-accessor.hh"
|
||||
#include "union-input-accessor.hh"
|
||||
#include "git-utils.hh"
|
||||
|
||||
#include "fetch-settings.hh"
|
||||
|
@ -134,11 +135,6 @@ std::optional<std::string> readHeadCached(const std::string & actualUrl)
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool isNotDotGitDirectory(const Path & path)
|
||||
{
|
||||
return baseNameOf(path) != ".git";
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
struct GitInputScheme : InputScheme
|
||||
|
@ -413,7 +409,7 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
std::string name = input.getName();
|
||||
|
||||
auto makeResult2 = [&](const Attrs & infoAttrs, ref<InputAccessor> accessor) -> std::pair<ref<InputAccessor>, Input>
|
||||
auto makeResult = [&](const Attrs & infoAttrs, ref<InputAccessor> accessor) -> std::pair<ref<InputAccessor>, Input>
|
||||
{
|
||||
assert(input.getRev());
|
||||
assert(!origRev || origRev == input.getRev());
|
||||
|
@ -424,18 +420,6 @@ struct GitInputScheme : InputScheme
|
|||
return {accessor, std::move(input)};
|
||||
};
|
||||
|
||||
auto makeResult = [&](const Attrs & infoAttrs, const StorePath & storePath) -> std::pair<ref<InputAccessor>, Input>
|
||||
{
|
||||
// FIXME: remove?
|
||||
//input.attrs.erase("narHash");
|
||||
auto narHash = store->queryPathInfo(storePath)->narHash;
|
||||
input.attrs.insert_or_assign("narHash", narHash.to_string(HashFormat::SRI, true));
|
||||
|
||||
auto accessor = makeStorePathAccessor(store, storePath, makeNotAllowedError(repoInfo.url));
|
||||
|
||||
return makeResult2(infoAttrs, accessor);
|
||||
};
|
||||
|
||||
auto originalRef = input.getRef();
|
||||
auto ref = originalRef ? *originalRef : getDefaultRef(repoInfo);
|
||||
input.attrs.insert_or_assign("ref", ref);
|
||||
|
@ -542,66 +526,39 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.url);
|
||||
|
||||
if (!repoInfo.submodules) {
|
||||
auto accessor = GitRepo::openRepo(CanonPath(repoDir))->getAccessor(rev);
|
||||
return makeResult2(infoAttrs, accessor);
|
||||
auto repo = GitRepo::openRepo(CanonPath(repoDir));
|
||||
|
||||
auto accessor = repo->getAccessor(rev);
|
||||
|
||||
/* If the repo has submodules, fetch them and return a union
|
||||
input accessor consisting of the accessor for the top-level
|
||||
repo and the accessors for the submodules. */
|
||||
if (repoInfo.submodules) {
|
||||
std::map<CanonPath, nix::ref<InputAccessor>> mounts;
|
||||
|
||||
for (auto & submodule : repo->getSubmodules(rev)) {
|
||||
auto resolved = repo->resolveSubmoduleUrl(submodule.url);
|
||||
debug("Git submodule %s: %s %s %s -> %s",
|
||||
submodule.path, submodule.url, submodule.branch, submodule.rev.gitRev(), resolved);
|
||||
fetchers::Attrs attrs;
|
||||
attrs.insert_or_assign("type", "git");
|
||||
attrs.insert_or_assign("url", resolved);
|
||||
if (submodule.branch != "")
|
||||
attrs.insert_or_assign("ref", submodule.branch);
|
||||
attrs.insert_or_assign("rev", submodule.rev.gitRev());
|
||||
auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs));
|
||||
auto [submoduleAccessor, submoduleInput2] =
|
||||
submoduleInput.scheme->getAccessor(store, submoduleInput);
|
||||
mounts.insert_or_assign(submodule.path, submoduleAccessor);
|
||||
}
|
||||
|
||||
else {
|
||||
// FIXME: use libgit2
|
||||
Path tmpDir = createTempDir();
|
||||
AutoDelete delTmpDir(tmpDir, true);
|
||||
PathFilter filter = defaultPathFilter;
|
||||
|
||||
Activity act(*logger, lvlChatty, actUnknown, fmt("copying Git tree '%s' to the store", input.to_string()));
|
||||
|
||||
Path tmpGitDir = createTempDir();
|
||||
AutoDelete delTmpGitDir(tmpGitDir, true);
|
||||
|
||||
runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir });
|
||||
|
||||
{
|
||||
// TODO: repoDir might lack the ref (it only checks if rev
|
||||
// exists, see FIXME above) so use a big hammer and fetch
|
||||
// everything to ensure we get the rev.
|
||||
Activity act(*logger, lvlTalkative, actUnknown, fmt("making temporary clone of '%s'", repoDir));
|
||||
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
|
||||
"--update-head-ok", "--", repoDir, "refs/*:refs/*" }, {}, true);
|
||||
if (!mounts.empty()) {
|
||||
mounts.insert_or_assign(CanonPath::root, accessor);
|
||||
accessor = makeUnionInputAccessor(std::move(mounts));
|
||||
}
|
||||
}
|
||||
|
||||
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", rev.gitRev() });
|
||||
|
||||
/* Ensure that we use the correct origin for fetching
|
||||
submodules. This matters for submodules with relative
|
||||
URLs. */
|
||||
if (repoInfo.isLocal) {
|
||||
writeFile(tmpGitDir + "/config", readFile(repoDir + "/" + repoInfo.gitDir + "/config"));
|
||||
|
||||
/* Restore the config.bare setting we may have just
|
||||
copied erroneously from the user's repo. */
|
||||
runProgram("git", true, { "-C", tmpDir, "config", "core.bare", "false" });
|
||||
} else
|
||||
runProgram("git", true, { "-C", tmpDir, "config", "remote.origin.url", repoInfo.url });
|
||||
|
||||
/* As an optimisation, copy the modules directory of the
|
||||
source repo if it exists. */
|
||||
auto modulesPath = repoDir + "/" + repoInfo.gitDir + "/modules";
|
||||
if (pathExists(modulesPath)) {
|
||||
Activity act(*logger, lvlTalkative, actUnknown, fmt("copying submodules of '%s'", repoInfo.url));
|
||||
runProgram("cp", true, { "-R", "--", modulesPath, tmpGitDir + "/modules" });
|
||||
}
|
||||
|
||||
{
|
||||
Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching submodules of '%s'", repoInfo.url));
|
||||
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }, {}, true);
|
||||
}
|
||||
|
||||
filter = isNotDotGitDirectory;
|
||||
|
||||
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);
|
||||
|
||||
return makeResult(infoAttrs, std::move(storePath));
|
||||
}
|
||||
return makeResult(infoAttrs, accessor);
|
||||
}
|
||||
|
||||
std::pair<ref<InputAccessor>, Input> getAccessorFromWorkdir(
|
||||
|
|
80
src/libfetchers/union-input-accessor.cc
Normal file
80
src/libfetchers/union-input-accessor.cc
Normal file
|
@ -0,0 +1,80 @@
|
|||
#include "union-input-accessor.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
struct UnionInputAccessor : InputAccessor
|
||||
{
|
||||
std::map<CanonPath, ref<InputAccessor>> mounts;
|
||||
|
||||
UnionInputAccessor(std::map<CanonPath, ref<InputAccessor>> _mounts)
|
||||
: mounts(std::move(_mounts))
|
||||
{
|
||||
// Currently we require a root filesystem. This could be relaxed.
|
||||
assert(mounts.contains(CanonPath::root));
|
||||
|
||||
// FIXME: should check that every mount point exists. Or we
|
||||
// could return dummy parent directories automatically.
|
||||
}
|
||||
|
||||
std::string readFile(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->readFile(subpath);
|
||||
}
|
||||
|
||||
bool pathExists(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->pathExists(subpath);
|
||||
}
|
||||
|
||||
Stat lstat(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->lstat(subpath);
|
||||
}
|
||||
|
||||
DirEntries readDirectory(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->readDirectory(subpath);
|
||||
}
|
||||
|
||||
std::string readLink(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->readLink(subpath);
|
||||
}
|
||||
|
||||
std::string showPath(const CanonPath & path) override
|
||||
{
|
||||
auto [accessor, subpath] = resolve(path);
|
||||
return accessor->showPath(subpath);
|
||||
}
|
||||
|
||||
std::pair<ref<InputAccessor>, CanonPath> resolve(CanonPath path)
|
||||
{
|
||||
// Find the nearest parent of `path` that is a mount point.
|
||||
std::vector<std::string> ss;
|
||||
while (true) {
|
||||
auto i = mounts.find(path);
|
||||
if (i != mounts.end()) {
|
||||
auto subpath = CanonPath::root;
|
||||
for (auto j = ss.rbegin(); j != ss.rend(); ++j)
|
||||
subpath.push(*j);
|
||||
return {i->second, std::move(subpath)};
|
||||
}
|
||||
|
||||
assert(!path.isRoot());
|
||||
ss.push_back(std::string(*path.baseName()));
|
||||
path.pop();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ref<InputAccessor> makeUnionInputAccessor(std::map<CanonPath, ref<InputAccessor>> mounts)
|
||||
{
|
||||
return make_ref<UnionInputAccessor>(std::move(mounts));
|
||||
}
|
||||
|
||||
}
|
9
src/libfetchers/union-input-accessor.hh
Normal file
9
src/libfetchers/union-input-accessor.hh
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "input-accessor.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
ref<InputAccessor> makeUnionInputAccessor(std::map<CanonPath, ref<InputAccessor>> mounts);
|
||||
|
||||
}
|
|
@ -118,11 +118,3 @@ cloneRepo=$TEST_ROOT/a/b/gitSubmodulesClone # NB /a/b to make the relative path
|
|||
git clone $rootRepo $cloneRepo
|
||||
pathIndirect=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath")
|
||||
[[ $pathIndirect = $pathWithRelative ]]
|
||||
|
||||
# Test that if the clone has the submodule already, we're not fetching
|
||||
# it again.
|
||||
git -C $cloneRepo submodule update --init
|
||||
rm $TEST_HOME/.cache/nix/fetcher-cache*
|
||||
rm -rf $subRepo
|
||||
pathSubmoduleGone=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath")
|
||||
[[ $pathSubmoduleGone = $pathWithRelative ]]
|
||||
|
|
Loading…
Reference in a new issue