git fetcher: Add exportIgnore parameter

Enabled for fetchGit, which historically had this behavior,
among other behaviors we do not want in fetchGit.

fetchTree disables this parameter by default. It can choose the
simpler behavior, as it is still experimental.

I am not confident that the filtering implementation is future
proof. It should reuse a source filtering wrapper, which I believe
Eelco has already written, but not merged yet.
This commit is contained in:
Robert Hensing 2023-11-27 22:34:41 +01:00
parent 4d0ecda33e
commit ce6d58a97c
5 changed files with 81 additions and 14 deletions

View file

@ -39,6 +39,10 @@ void emitTreeAttrs(
attrs.alloc("submodules").mkBool(
fetchers::maybeGetBoolAttr(input.attrs, "submodules").value_or(false));
if (input.getType() == "git")
attrs.alloc("exportIgnore").mkBool(
fetchers::maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false));
if (!forceDirty) {
if (auto rev = input.getRev()) {
@ -112,6 +116,11 @@ static void fetchTree(
attrs.emplace("type", type.value());
if (params.isFetchGit) {
// Default value; user attrs are assigned later.
attrs.emplace("exportIgnore", Explicit<bool>{true});
}
for (auto & attr : *args[0]->attrs) {
if (attr.name == state.sType) continue;
state.forceValue(*attr.value, attr.pos);
@ -593,6 +602,11 @@ static RegisterPrimOp primop_fetchGit({
A Boolean parameter that specifies whether submodules should be checked out.
- `exportIgnore` (default: `true`)
A Boolean parameter that specifies whether `export-ignore` from `.gitattributes` should be applied.
This approximates part of the `git archive` behavior.
- `shallow` (default: `false`)
A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed.

View file

@ -7,6 +7,7 @@
#include <boost/core/span.hpp>
#include <git2/attr.h>
#include <git2/blob.h>
#include <git2/commit.h>
#include <git2/config.h>
@ -21,6 +22,7 @@
#include <git2/submodule.h>
#include <git2/tree.h>
#include <iostream>
#include <unordered_set>
#include <queue>
#include <regex>
@ -307,7 +309,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return std::nullopt;
}
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override;
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
std::string resolveSubmoduleUrl(
const std::string & url,
@ -340,7 +342,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return true;
}
ref<InputAccessor> getAccessor(const Hash & rev) override;
ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) override;
static int sidebandProgressCallback(const char * str, int len, void * payload)
{
@ -460,10 +462,12 @@ struct GitInputAccessor : InputAccessor
{
ref<GitRepoImpl> repo;
Tree root;
bool exportIgnore;
GitInputAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
GitInputAccessor(ref<GitRepoImpl> repo_, const Hash & rev, bool exportIgnore)
: repo(repo_)
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE))
, exportIgnore(exportIgnore)
{
}
@ -492,7 +496,7 @@ struct GitInputAccessor : InputAccessor
return Stat { .type = tDirectory };
auto entry = lookup(path);
if (!entry)
if (!entry || isExportIgnored(path))
return std::nullopt;
auto mode = git_tree_entry_filemode(entry);
@ -527,6 +531,12 @@ struct GitInputAccessor : InputAccessor
for (size_t n = 0; n < count; ++n) {
auto entry = git_tree_entry_byindex(tree.get(), n);
if (exportIgnore) {
if (isExportIgnored(path + git_tree_entry_name(entry))) {
continue;
}
}
// FIXME: add to cache
res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{});
}
@ -556,6 +566,33 @@ struct GitInputAccessor : InputAccessor
std::unordered_map<CanonPath, TreeEntry> lookupCache;
bool isExportIgnored(const CanonPath & path) {
if (!exportIgnore)
return false;
const char *exportIgnoreEntry = nullptr;
// GIT_ATTR_CHECK_INDEX_ONLY:
// > It will use index only for creating archives or for a bare repo
// > (if an index has been specified for the bare repo).
// -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48
if (git_attr_get(&exportIgnoreEntry,
*repo,
GIT_ATTR_CHECK_INDEX_ONLY,
std::string(path.rel()).c_str(),
"export-ignore")) {
if (git_error_last()->klass == GIT_ENOTFOUND)
return false;
else
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message);
}
else {
// Official git will silently reject export-ignore lines that have
// values. We do the same.
return GIT_ATTR_IS_TRUE(exportIgnoreEntry);
}
}
/* Recursively look up 'path' relative to the root. */
git_tree_entry * lookup(const CanonPath & path)
{
@ -569,6 +606,10 @@ struct GitInputAccessor : InputAccessor
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message);
}
if (entry && isExportIgnored(path)) {
entry.reset();
}
i = lookupCache.emplace(path, std::move(entry)).first;
}
@ -644,17 +685,17 @@ struct GitInputAccessor : InputAccessor
}
};
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev)
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore)
{
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev);
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev, exportIgnore);
}
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev)
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
{
/* Read the .gitmodules files from this revision. */
CanonPath modulesFile(".gitmodules");
auto accessor = getAccessor(rev);
auto accessor = getAccessor(rev, exportIgnore);
if (!accessor->pathExists(modulesFile)) return {};
/* Parse it and get the revision of each submodule. */

View file

@ -57,7 +57,7 @@ struct GitRepo
* Return the submodules of this repo at the indicated revision,
* along with the revision of each submodule.
*/
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) = 0;
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0;
virtual std::string resolveSubmoduleUrl(
const std::string & url,
@ -71,7 +71,7 @@ struct GitRepo
virtual bool hasObject(const Hash & oid) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
virtual void fetch(
const std::string & url,

View file

@ -174,7 +174,7 @@ struct GitInputScheme : InputScheme
for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "allRefs" || name == "verifyCommit")
else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
attrs.emplace(name, Explicit<bool> { value == "1" });
else
url2.query.emplace(name, value);
@ -199,6 +199,7 @@ struct GitInputScheme : InputScheme
"rev",
"shallow",
"submodules",
"exportIgnore",
"lastModified",
"revCount",
"narHash",
@ -250,6 +251,8 @@ struct GitInputScheme : InputScheme
url.query.insert_or_assign("shallow", "1");
if (getSubmodulesAttr(input))
url.query.insert_or_assign("submodules", "1");
if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false))
url.query.insert_or_assign("exportIgnore", "1");
if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false))
url.query.insert_or_assign("verifyCommit", "1");
auto publicKeys = getPublicKeys(input.attrs);
@ -372,6 +375,11 @@ struct GitInputScheme : InputScheme
return maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
}
bool getExportIgnoreAttr(const Input & input) const
{
return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false);
}
bool getAllRefsAttr(const Input & input) const
{
return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
@ -600,7 +608,8 @@ struct GitInputScheme : InputScheme
verifyCommit(input, repo);
auto accessor = repo->getAccessor(rev);
bool exportIgnore = getExportIgnoreAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore);
accessor->setPathDisplay("«" + input.to_string() + "»");
@ -610,7 +619,7 @@ struct GitInputScheme : InputScheme
if (getSubmodulesAttr(input)) {
std::map<CanonPath, nix::ref<InputAccessor>> mounts;
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) {
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) {
auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url);
debug("Git submodule %s: %s %s %s -> %s",
submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved);

View file

@ -231,12 +231,15 @@ unset _NIX_FORCE_HTTP
# Ensure .gitattributes is respected
touch $repo/not-exported-file
touch $repo/exported-wonky
echo "/not-exported-file export-ignore" >> $repo/.gitattributes
git -C $repo add not-exported-file .gitattributes
echo "/exported-wonky export-ignore=wonk" >> $repo/.gitattributes
git -C $repo add not-exported-file exported-wonky .gitattributes
git -C $repo commit -m 'Bla6'
rev5=$(git -C $repo rev-parse HEAD)
path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath")
[[ ! -e $path12/not-exported-file ]]
[[ -e $path12/exported-wonky ]]
# should fail if there is no repo
rm -rf $repo/.git