GitInputScheme: Use FSInputAccessor for local trees

This commit is contained in:
Eelco Dolstra 2022-02-16 21:00:54 +01:00
parent 38c665847a
commit 631ae8df6d

View file

@ -122,11 +122,11 @@ struct GitInputScheme : InputScheme
void clone(const Input & input, const Path & destDir) override void clone(const Input & input, const Path & destDir) override
{ {
auto [isLocal, actualUrl] = getActualUrl(input); auto repoInfo = getRepoInfo(input);
Strings args = {"clone"}; Strings args = {"clone"};
args.push_back(actualUrl); args.push_back(repoInfo.url);
if (auto ref = input.getRef()) { if (auto ref = input.getRef()) {
args.push_back("--branch"); args.push_back("--branch");
@ -161,8 +161,51 @@ struct GitInputScheme : InputScheme
{ "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg });
} }
std::pair<bool, std::string> getActualUrl(const Input & input) const struct RepoInfo
{ {
bool shallow = false;
bool submodules = false;
bool allRefs = false;
std::string cacheType;
/* Whether this is a local, non-bare repository. */
bool isLocal = false;
/* Whether this is a local, non-bare, dirty repository. */
bool isDirty = false;
/* Whether this repository has any commits. */
bool hasHead = true;
/* URL of the repo, or its path if isLocal. */
std::string url;
void checkDirty()
{
if (isDirty) {
if (!fetchSettings.allowDirty)
throw Error("Git tree '%s' is dirty", url);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", url);
}
}
};
RepoInfo getRepoInfo(const Input & input)
{
RepoInfo repoInfo;
repoInfo.shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false);
repoInfo.submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
repoInfo.allRefs = maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
repoInfo.cacheType = "git";
if (repoInfo.shallow) repoInfo.cacheType += "-shallow";
if (repoInfo.submodules) repoInfo.cacheType += "-submodules";
if (repoInfo.allRefs) repoInfo.cacheType += "-all-refs";
// file:// URIs are normally not cloned (but otherwise treated the // file:// URIs are normally not cloned (but otherwise treated the
// same as remote URIs, i.e. we don't use the working tree or // same as remote URIs, i.e. we don't use the working tree or
// HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git
@ -170,29 +213,90 @@ struct GitInputScheme : InputScheme
static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing
auto url = parseURL(getStrAttr(input.attrs, "url")); auto url = parseURL(getStrAttr(input.attrs, "url"));
bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git"); bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git");
bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; repoInfo.isLocal = url.scheme == "file" && !forceHttp && !isBareRepository;
return {isLocal, isLocal ? url.path : url.base}; repoInfo.url = repoInfo.isLocal ? url.path : url.base;
// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
if (!input.getRef() && !input.getRev() && repoInfo.isLocal) {
repoInfo.isDirty = true;
auto env = getEnv();
/* Set LC_ALL to C: because we rely on the error messages
from git rev-parse to determine what went wrong that
way unknown errors can lead to a failure instead of
continuing through the wrong code path. */
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like
a commit, since that is the ref we want to use later
on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", repoInfo.url, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", repoInfo.url);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", repoInfo.url, exitCode, errorMessage);
}
repoInfo.hasHead = exitCode == 0;
try {
if (repoInfo.hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because it's conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", repoInfo.url, "diff", "HEAD", "--quiet"});
if (!repoInfo.submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
repoInfo.isDirty = false;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
}
return repoInfo;
}
std::set<std::string> listFiles(const RepoInfo & repoInfo)
{
auto gitOpts = Strings({ "-C", repoInfo.url, "ls-files", "-z" });
if (repoInfo.submodules)
gitOpts.emplace_back("--recurse-submodules");
return tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
} }
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{ {
Input input(_input); Input input(_input);
auto repoInfo = getRepoInfo(input);
std::string name = input.getName(); std::string name = input.getName();
bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false);
bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
bool allRefs = maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
std::string cacheType = "git";
if (shallow) cacheType += "-shallow";
if (submodules) cacheType += "-submodules";
if (allRefs) cacheType += "-all-refs";
auto getLockedAttrs = [&]() auto getLockedAttrs = [&]()
{ {
return Attrs({ return Attrs({
{"type", cacheType}, {"type", repoInfo.cacheType},
{"name", name}, {"name", name},
{"rev", input.getRev()->gitRev()}, {"rev", input.getRev()->gitRev()},
}); });
@ -203,7 +307,7 @@ struct GitInputScheme : InputScheme
{ {
assert(input.getRev()); assert(input.getRev());
assert(!_input.getRev() || _input.getRev() == input.getRev()); assert(!_input.getRev() || _input.getRev() == input.getRev());
if (!shallow) if (!repoInfo.shallow)
input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount"));
input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified"));
return {std::move(storePath), input}; return {std::move(storePath), input};
@ -214,80 +318,15 @@ struct GitInputScheme : InputScheme
return makeResult(res->first, std::move(res->second)); return makeResult(res->first, std::move(res->second));
} }
auto [isLocal, actualUrl_] = getActualUrl(input); if (repoInfo.isDirty) {
auto actualUrl = actualUrl_; // work around clang bug
// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
if (!input.getRef() && !input.getRev() && isLocal) {
bool clean = false;
auto env = getEnv();
// Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
// that way unknown errors can lead to a failure instead of continuing through the wrong code path
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like a commit,
since that is the refrence we want to use later on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", actualUrl, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", actualUrl);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage);
}
bool hasHead = exitCode == 0;
try {
if (hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because its conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", actualUrl, "diff", "HEAD", "--quiet"});
if (!submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
clean = true;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
if (!clean) {
/* This is an unclean working tree. So copy all tracked files. */ /* This is an unclean working tree. So copy all tracked files. */
repoInfo.checkDirty();
if (!fetchSettings.allowDirty) auto files = listFiles(repoInfo);
throw Error("Git tree '%s' is dirty", actualUrl);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", actualUrl);
auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");
auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
PathFilter filter = [&](const Path & p) -> bool { PathFilter filter = [&](const Path & p) -> bool {
assert(hasPrefix(p, actualUrl)); assert(hasPrefix(p, repoInfo.url));
std::string file(p, actualUrl.size() + 1); std::string file(p, repoInfo.url.size() + 1);
auto st = lstat(p); auto st = lstat(p);
@ -300,36 +339,36 @@ struct GitInputScheme : InputScheme
return files.count(file); return files.count(file);
}; };
auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); auto storePath = store->addToStore(input.getName(), repoInfo.url, FileIngestionMethod::Recursive, htSHA256, filter);
// FIXME: maybe we should use the timestamp of the last // FIXME: maybe we should use the timestamp of the last
// modified dirty file? // modified dirty file?
input.attrs.insert_or_assign( input.attrs.insert_or_assign(
"lastModified", "lastModified",
hasHead ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); repoInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", repoInfo.url, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
return {std::move(storePath), input}; return {std::move(storePath), input};
} }
}
if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); // FIXME: move to getRepoInfo().
if (!input.getRef()) input.attrs.insert_or_assign("ref", repoInfo.isLocal ? readHead(repoInfo.url) : "master");
Attrs unlockedAttrs({ Attrs unlockedAttrs({
{"type", cacheType}, {"type", repoInfo.cacheType},
{"name", name}, {"name", name},
{"url", actualUrl}, {"url", repoInfo.url},
{"ref", *input.getRef()}, {"ref", *input.getRef()},
}); });
Path repoDir; Path repoDir;
if (isLocal) { if (repoInfo.isLocal) {
if (!input.getRev()) if (!input.getRev())
input.attrs.insert_or_assign("rev", input.attrs.insert_or_assign("rev",
Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input.getRef() })), htSHA1).gitRev()); Hash::parseAny(chomp(runProgram("git", true, { "-C", repoInfo.url, "rev-parse", *input.getRef() })), htSHA1).gitRev());
repoDir = actualUrl; repoDir = repoInfo.url;
} else { } else {
@ -341,7 +380,7 @@ struct GitInputScheme : InputScheme
} }
} }
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, repoInfo.url).to_string(Base32, false);
repoDir = cacheDir; repoDir = cacheDir;
createDirs(dirOf(cacheDir)); createDirs(dirOf(cacheDir));
@ -373,7 +412,7 @@ struct GitInputScheme : InputScheme
} }
} }
} else { } else {
if (allRefs) { if (repoInfo.allRefs) {
doFetch = true; doFetch = true;
} else { } else {
/* If the local ref is older than tarball-ttl seconds, do a /* If the local ref is older than tarball-ttl seconds, do a
@ -385,23 +424,23 @@ struct GitInputScheme : InputScheme
} }
if (doFetch) { if (doFetch) {
Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", repoInfo.url));
// FIXME: git stderr messes up our progress indicator, so // FIXME: git stderr messes up our progress indicator, so
// we're using --quiet for now. Should process its stderr. // we're using --quiet for now. Should process its stderr.
try { try {
auto ref = input.getRef(); auto ref = input.getRef();
auto fetchRef = allRefs auto fetchRef = repoInfo.allRefs
? "refs/*" ? "refs/*"
: ref->compare(0, 5, "refs/") == 0 : ref->compare(0, 5, "refs/") == 0
? *ref ? *ref
: ref == "HEAD" : ref == "HEAD"
? *ref ? *ref
: "refs/heads/" + *ref; : "refs/heads/" + *ref;
runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", repoInfo.url, fmt("%s:%s", fetchRef, fetchRef) });
} catch (Error & e) { } catch (Error & e) {
if (!pathExists(localRefFile)) throw; if (!pathExists(localRefFile)) throw;
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); warn("could not update local clone of Git repository '%s'; continuing with the most recent version", repoInfo.url);
} }
struct timeval times[2]; struct timeval times[2];
@ -421,12 +460,12 @@ struct GitInputScheme : InputScheme
bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true";
if (isShallow && !shallow) if (isShallow && !repoInfo.shallow)
throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", repoInfo.url);
// FIXME: check whether rev is an ancestor of ref. // FIXME: check whether rev is an ancestor of ref.
printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), actualUrl); printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), repoInfo.url);
/* Now that we know the ref, check again whether we have it in /* Now that we know the ref, check again whether we have it in
the store. */ the store. */
@ -452,11 +491,11 @@ struct GitInputScheme : InputScheme
"allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".",
input.getRev()->gitRev(), input.getRev()->gitRev(),
*input.getRef(), *input.getRef(),
actualUrl repoInfo.url
); );
} }
if (submodules) { if (repoInfo.submodules) {
Path tmpGitDir = createTempDir(); Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true); AutoDelete delTmpGitDir(tmpGitDir, true);
@ -468,7 +507,7 @@ struct GitInputScheme : InputScheme
"--update-head-ok", "--", repoDir, "refs/*:refs/*" }); "--update-head-ok", "--", repoDir, "refs/*:refs/*" });
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() }); runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() });
runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", repoInfo.url });
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });
filter = isNotDotGitDirectory; filter = isNotDotGitDirectory;
@ -495,7 +534,7 @@ struct GitInputScheme : InputScheme
{"lastModified", lastModified}, {"lastModified", lastModified},
}); });
if (!shallow) if (!repoInfo.shallow)
infoAttrs.insert_or_assign("revCount", infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() }))); std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() })));
@ -516,6 +555,23 @@ struct GitInputScheme : InputScheme
return makeResult(infoAttrs, std::move(storePath)); return makeResult(infoAttrs, std::move(storePath));
} }
std::pair<ref<InputAccessor>, Input> lazyFetch(ref<Store> store, const Input & input) override
{
auto repoInfo = getRepoInfo(input);
/* Unless we're using the working tree, copy the tree into the
Nix store. TODO: We could have an accessor for fetching
files from the Git repository directly. */
if (input.getRef() || input.getRev() || !repoInfo.isLocal)
return InputScheme::lazyFetch(store, input);
repoInfo.checkDirty();
// FIXME: return updated input.
return {makeFSInputAccessor(repoInfo.url, listFiles(repoInfo)), input};
}
}; };
static auto rGitInputScheme = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); }); static auto rGitInputScheme = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); });