Merge pull request #9480 from NixOS/libfetchers-git-exportIgnore

libfetchers/git: Support export-ignore
This commit is contained in:
Robert Hensing 2024-01-16 23:03:46 +01:00 committed by GitHub
commit 2a3c5e6b8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 296 additions and 19 deletions

View file

@ -0,0 +1,18 @@
---
synopsis: "Nix now uses `libgit2` for Git fetching"
prs:
- 9240
- 9241
- 9258
- 9480
issues:
- 5313
---
Nix has built-in support for fetching sources from Git, during evaluation and locking; outside the sandbox.
The existing implementation based on the Git CLI had issues regarding reproducibility and performance.
Most of the original `fetchGit` behavior has been implemented using the `libgit2` library, which gives the fetcher fine-grained control.
Known issues:
- The `export-subst` behavior has not been reimplemented. [Partial](https://github.com/NixOS/nix/pull/9391#issuecomment-1872503447) support for this Git feature is feasible, but it did not make the release window.

View file

@ -1,3 +1,4 @@
#include "libfetchers/attrs.hh"
#include "primops.hh"
#include "eval-inline.hh"
#include "eval-settings.hh"
@ -25,7 +26,7 @@ void emitTreeAttrs(
{
assert(input.isLocked());
auto attrs = state.buildBindings(10);
auto attrs = state.buildBindings(100);
state.mkStorePathString(storePath, attrs.alloc(state.sOutPath));
@ -135,6 +136,10 @@ static void fetchTree(
state.symbols[attr.name], showType(*attr.value)));
}
if (params.isFetchGit && !attrs.contains("exportIgnore") && (!attrs.contains("submodules") || !*fetchers::maybeGetBoolAttr(attrs, "submodules"))) {
attrs.emplace("exportIgnore", Explicit<bool>{true});
}
if (!params.allowNameArgument)
if (auto nameIter = attrs.find("name"); nameIter != attrs.end())
state.debugThrowLastTrace(EvalError({
@ -152,6 +157,9 @@ static void fetchTree(
fetchers::Attrs attrs;
attrs.emplace("type", "git");
attrs.emplace("url", fixGitURL(url));
if (!attrs.contains("exportIgnore") && (!attrs.contains("submodules") || !*fetchers::maybeGetBoolAttr(attrs, "submodules"))) {
attrs.emplace("exportIgnore", Explicit<bool>{true});
}
input = fetchers::Input::fromAttrs(std::move(attrs));
} else {
if (!experimentalFeatureSettings.isEnabled(Xp::Flakes))
@ -593,6 +601,13 @@ static RegisterPrimOp primop_fetchGit({
A Boolean parameter that specifies whether submodules should be checked out.
- `exportIgnore` (default: `true`)
A Boolean parameter that specifies whether `export-ignore` from `.gitattributes` should be applied.
This approximates part of the `git archive` behavior.
Enabling this option is not recommended because it is unknown whether the Git developers commit to the reproducibility of `export-ignore` in newer Git versions.
- `shallow` (default: `false`)
A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed.

View file

@ -187,6 +187,13 @@ struct InputScheme
virtual bool isDirect(const Input & input) const
{ return true; }
/**
* A sufficiently unique string that can be used as a cache key to identify the `input`.
*
* Only known-equivalent inputs should return the same fingerprint.
*
* This is not a stable identifier between Nix versions, but not guaranteed to change either.
*/
virtual std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const
{ return std::nullopt; }
};

View file

@ -80,4 +80,13 @@ ref<AllowListInputAccessor> AllowListInputAccessor::create(
return make_ref<AllowListInputAccessorImpl>(next, std::move(allowedPaths), std::move(makeNotAllowedError));
}
bool CachingFilteringInputAccessor::isAllowed(const CanonPath & path)
{
auto i = cache.find(path);
if (i != cache.end()) return i->second;
auto res = isAllowedUncached(path);
cache.emplace(path, res);
return res;
}
}

View file

@ -6,7 +6,7 @@
namespace nix {
/**
* A function that should throw an exception of type
* A function that returns an exception of type
* `RestrictedPathError` explaining that access to `path` is
* forbidden.
*/
@ -71,4 +71,18 @@ struct AllowListInputAccessor : public FilteringInputAccessor
using FilteringInputAccessor::FilteringInputAccessor;
};
/**
* A wrapping `InputAccessor` mix-in where `isAllowed()` caches the result of virtual `isAllowedUncached()`.
*/
struct CachingFilteringInputAccessor : FilteringInputAccessor
{
std::map<CanonPath, bool> cache;
using FilteringInputAccessor::FilteringInputAccessor;
bool isAllowed(const CanonPath & path) override;
virtual bool isAllowedUncached(const CanonPath & path) = 0;
};
}

View file

@ -1,5 +1,7 @@
#include "git-utils.hh"
#include "fs-input-accessor.hh"
#include "input-accessor.hh"
#include "filtering-input-accessor.hh"
#include "cache.hh"
#include "finally.hh"
#include "processes.hh"
@ -7,6 +9,7 @@
#include <boost/core/span.hpp>
#include <git2/attr.h>
#include <git2/blob.h>
#include <git2/commit.h>
#include <git2/config.h>
@ -21,6 +24,7 @@
#include <git2/submodule.h>
#include <git2/tree.h>
#include <iostream>
#include <unordered_set>
#include <queue>
#include <regex>
@ -50,6 +54,8 @@ bool operator == (const git_oid & oid1, const git_oid & oid2)
namespace nix {
struct GitInputAccessor;
// Some wrapper types that ensure that the git_*_free functions get called.
template<auto del>
struct Deleter
@ -307,7 +313,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return std::nullopt;
}
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override;
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
std::string resolveSubmoduleUrl(
const std::string & url,
@ -340,7 +346,14 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return true;
}
ref<InputAccessor> getAccessor(const Hash & rev) override;
/**
* A 'GitInputAccessor' with no regard for export-ignore or any other transformations.
*/
ref<GitInputAccessor> getRawAccessor(const Hash & rev);
ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) override;
ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;
static int sidebandProgressCallback(const char * str, int len, void * payload)
{
@ -456,6 +469,9 @@ ref<GitRepo> GitRepo::openRepo(const CanonPath & path, bool create, bool bare)
return make_ref<GitRepoImpl>(path, create, bare);
}
/**
* Raw git tree input accessor.
*/
struct GitInputAccessor : InputAccessor
{
ref<GitRepoImpl> repo;
@ -644,17 +660,114 @@ struct GitInputAccessor : InputAccessor
}
};
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev)
struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor {
ref<GitRepoImpl> repo;
std::optional<Hash> rev;
GitExportIgnoreInputAccessor(ref<GitRepoImpl> repo, ref<InputAccessor> next, std::optional<Hash> rev)
: CachingFilteringInputAccessor(next, [&](const CanonPath & path) {
return RestrictedPathError(fmt("'%s' does not exist because it was fetched with exportIgnore enabled", path));
})
, repo(repo)
, rev(rev)
{ }
bool gitAttrGet(const CanonPath & path, const char * attrName, const char * & valueOut)
{
const char * pathCStr = path.rel_c_str();
if (rev) {
git_attr_options opts = GIT_ATTR_OPTIONS_INIT;
opts.attr_commit_id = hashToOID(*rev);
// TODO: test that gitattributes from global and system are not used
// (ie more or less: home and etc - both of them!)
opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM;
return git_attr_get_ext(
&valueOut,
*repo,
&opts,
pathCStr,
attrName
);
}
else {
return git_attr_get(
&valueOut,
*repo,
GIT_ATTR_CHECK_INDEX_ONLY | GIT_ATTR_CHECK_NO_SYSTEM,
pathCStr,
attrName);
}
}
bool isExportIgnored(const CanonPath & path)
{
const char *exportIgnoreEntry = nullptr;
// GIT_ATTR_CHECK_INDEX_ONLY:
// > It will use index only for creating archives or for a bare repo
// > (if an index has been specified for the bare repo).
// -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48
if (gitAttrGet(path, "export-ignore", exportIgnoreEntry)) {
if (git_error_last()->klass == GIT_ENOTFOUND)
return false;
else
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message);
}
else {
// Official git will silently reject export-ignore lines that have
// values. We do the same.
return GIT_ATTR_IS_TRUE(exportIgnoreEntry);
}
}
bool isAllowedUncached(const CanonPath & path) override
{
return !isExportIgnored(path);
}
};
ref<GitInputAccessor> GitRepoImpl::getRawAccessor(const Hash & rev)
{
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev);
auto self = ref<GitRepoImpl>(shared_from_this());
return make_ref<GitInputAccessor>(self, rev);
}
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev)
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore)
{
auto self = ref<GitRepoImpl>(shared_from_this());
ref<GitInputAccessor> rawGitAccessor = getRawAccessor(rev);
if (exportIgnore) {
return make_ref<GitExportIgnoreInputAccessor>(self, rawGitAccessor, rev);
}
else {
return rawGitAccessor;
}
}
ref<InputAccessor> GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError)
{
auto self = ref<GitRepoImpl>(shared_from_this());
ref<InputAccessor> fileAccessor =
AllowListInputAccessor::create(
makeFSInputAccessor(path),
std::set<CanonPath> { wd.files },
std::move(makeNotAllowedError));
if (exportIgnore) {
return make_ref<GitExportIgnoreInputAccessor>(self, fileAccessor, std::nullopt);
}
else {
return fileAccessor;
}
}
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
{
/* Read the .gitmodules files from this revision. */
CanonPath modulesFile(".gitmodules");
auto accessor = getAccessor(rev);
auto accessor = getAccessor(rev, exportIgnore);
if (!accessor->pathExists(modulesFile)) return {};
/* Parse it and get the revision of each submodule. */
@ -665,8 +778,10 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
std::vector<std::tuple<Submodule, Hash>> result;
auto rawAccessor = getRawAccessor(rev);
for (auto & submodule : parseSubmodules(CanonPath(pathTemp))) {
auto rev = accessor.dynamic_pointer_cast<GitInputAccessor>()->getSubmoduleRev(submodule.path);
auto rev = rawAccessor->getSubmoduleRev(submodule.path);
result.push_back({std::move(submodule), rev});
}

View file

@ -1,5 +1,6 @@
#pragma once
#include "filtering-input-accessor.hh"
#include "input-accessor.hh"
namespace nix {
@ -57,7 +58,7 @@ struct GitRepo
* Return the submodules of this repo at the indicated revision,
* along with the revision of each submodule.
*/
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) = 0;
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0;
virtual std::string resolveSubmoduleUrl(
const std::string & url,
@ -71,7 +72,9 @@ struct GitRepo
virtual bool hasObject(const Hash & oid) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
virtual ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0;
virtual void fetch(
const std::string & url,

View file

@ -1,3 +1,4 @@
#include "error.hh"
#include "fetchers.hh"
#include "users.hh"
#include "cache.hh"
@ -9,7 +10,6 @@
#include "processes.hh"
#include "git.hh"
#include "fs-input-accessor.hh"
#include "filtering-input-accessor.hh"
#include "mounted-input-accessor.hh"
#include "git-utils.hh"
#include "logging.hh"
@ -174,7 +174,7 @@ struct GitInputScheme : InputScheme
for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "allRefs" || name == "verifyCommit")
else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
attrs.emplace(name, Explicit<bool> { value == "1" });
else
url2.query.emplace(name, value);
@ -199,6 +199,7 @@ struct GitInputScheme : InputScheme
"rev",
"shallow",
"submodules",
"exportIgnore",
"lastModified",
"revCount",
"narHash",
@ -250,6 +251,8 @@ struct GitInputScheme : InputScheme
url.query.insert_or_assign("shallow", "1");
if (getSubmodulesAttr(input))
url.query.insert_or_assign("submodules", "1");
if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false))
url.query.insert_or_assign("exportIgnore", "1");
if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false))
url.query.insert_or_assign("verifyCommit", "1");
auto publicKeys = getPublicKeys(input.attrs);
@ -372,6 +375,11 @@ struct GitInputScheme : InputScheme
return maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
}
bool getExportIgnoreAttr(const Input & input) const
{
return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false);
}
bool getAllRefsAttr(const Input & input) const
{
return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
@ -600,7 +608,8 @@ struct GitInputScheme : InputScheme
verifyCommit(input, repo);
auto accessor = repo->getAccessor(rev);
bool exportIgnore = getExportIgnoreAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore);
accessor->setPathDisplay("«" + input.to_string() + "»");
@ -610,7 +619,7 @@ struct GitInputScheme : InputScheme
if (getSubmodulesAttr(input)) {
std::map<CanonPath, nix::ref<InputAccessor>> mounts;
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) {
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) {
auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url);
debug("Git submodule %s: %s %s %s -> %s",
submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved);
@ -620,6 +629,7 @@ struct GitInputScheme : InputScheme
if (submodule.branch != "")
attrs.insert_or_assign("ref", submodule.branch);
attrs.insert_or_assign("rev", submoduleRev.gitRev());
attrs.insert_or_assign("exportIgnore", Explicit<bool>{ exportIgnore });
auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs));
auto [submoduleAccessor, submoduleInput2] =
submoduleInput.getAccessor(store);
@ -650,10 +660,13 @@ struct GitInputScheme : InputScheme
for (auto & submodule : repoInfo.workdirInfo.submodules)
repoInfo.workdirInfo.files.insert(submodule.path);
auto repo = GitRepo::openRepo(CanonPath(repoInfo.url), false, false);
auto exportIgnore = getExportIgnoreAttr(input);
ref<InputAccessor> accessor =
AllowListInputAccessor::create(
makeFSInputAccessor(CanonPath(repoInfo.url)),
std::move(repoInfo.workdirInfo.files),
repo->getAccessor(repoInfo.workdirInfo,
exportIgnore,
makeNotAllowedError(repoInfo.url));
/* If the repo has submodules, return a mounted input accessor
@ -667,6 +680,8 @@ struct GitInputScheme : InputScheme
fetchers::Attrs attrs;
attrs.insert_or_assign("type", "git");
attrs.insert_or_assign("url", submodulePath.abs());
attrs.insert_or_assign("exportIgnore", Explicit<bool>{ exportIgnore });
auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs));
auto [submoduleAccessor, submoduleInput2] =
submoduleInput.getAccessor(store);
@ -725,6 +740,16 @@ struct GitInputScheme : InputScheme
auto repoInfo = getRepoInfo(input);
if (getExportIgnoreAttr(input)
&& getSubmodulesAttr(input)) {
/* In this situation, we don't have a git CLI behavior that we can copy.
`git archive` does not support submodules, so it is unclear whether
rules from the parent should affect the submodule or not.
When git may eventually implement this, we need Nix to match its
behavior. */
throw UnimplementedError("exportIgnore and submodules are not supported together yet");
}
auto [accessor, final] =
input.getRef() || input.getRev() || !repoInfo.isLocal
? getAccessorFromCommit(store, repoInfo, std::move(input))
@ -738,7 +763,7 @@ struct GitInputScheme : InputScheme
std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
if (auto rev = input.getRev())
return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "");
return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : "");
else
return std::nullopt;
}

View file

@ -88,6 +88,13 @@ public:
std::string_view rel() const
{ return ((std::string_view) path).substr(1); }
const char * rel_c_str() const
{
auto cs = path.c_str();
assert(cs[0]); // for safety if invariant is broken
return &cs[1];
}
struct Iterator
{
std::string_view remaining;

View file

@ -229,6 +229,18 @@ rev_tag2=$(git -C $repo rev-parse refs/tags/tag2)
[[ $rev_tag2_nix = $rev_tag2 ]]
unset _NIX_FORCE_HTTP
# Ensure .gitattributes is respected
touch $repo/not-exported-file
touch $repo/exported-wonky
echo "/not-exported-file export-ignore" >> $repo/.gitattributes
echo "/exported-wonky export-ignore=wonk" >> $repo/.gitattributes
git -C $repo add not-exported-file exported-wonky .gitattributes
git -C $repo commit -m 'Bla6'
rev5=$(git -C $repo rev-parse HEAD)
path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath")
[[ ! -e $path12/not-exported-file ]]
[[ -e $path12/exported-wonky ]]
# should fail if there is no repo
rm -rf $repo/.git
(! nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath")

View file

@ -118,3 +118,55 @@ cloneRepo=$TEST_ROOT/a/b/gitSubmodulesClone # NB /a/b to make the relative path
git clone $rootRepo $cloneRepo
pathIndirect=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath")
[[ $pathIndirect = $pathWithRelative ]]
# Test submodule export-ignore interaction
git -C $rootRepo/sub config user.email "foobar@example.com"
git -C $rootRepo/sub config user.name "Foobar"
echo "/exclude-from-root export-ignore" >> $rootRepo/.gitattributes
# TBD possible semantics for submodules + exportIgnore
# echo "/sub/exclude-deep export-ignore" >> $rootRepo/.gitattributes
echo nope > $rootRepo/exclude-from-root
git -C $rootRepo add .gitattributes exclude-from-root
git -C $rootRepo commit -m "Add export-ignore"
echo "/exclude-from-sub export-ignore" >> $rootRepo/sub/.gitattributes
echo nope > $rootRepo/sub/exclude-from-sub
# TBD possible semantics for submodules + exportIgnore
# echo aye > $rootRepo/sub/exclude-from-root
git -C $rootRepo/sub add .gitattributes exclude-from-sub
git -C $rootRepo/sub commit -m "Add export-ignore (sub)"
git -C $rootRepo add sub
git -C $rootRepo commit -m "Update submodule"
git -C $rootRepo status
# # TBD: not supported yet, because semantics are undecided and current implementation leaks rules from the root to submodules
# # exportIgnore can be used with submodules
# pathWithExportIgnore=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; exportIgnore = true; }).outPath")
# # find $pathWithExportIgnore
# # git -C $rootRepo archive --format=tar HEAD | tar -t
# # cp -a $rootRepo /tmp/rootRepo
# [[ -e $pathWithExportIgnore/sub/content ]]
# [[ ! -e $pathWithExportIgnore/exclude-from-root ]]
# [[ ! -e $pathWithExportIgnore/sub/exclude-from-sub ]]
# TBD possible semantics for submodules + exportIgnore
# # root .gitattribute has no power across submodule boundary
# [[ -e $pathWithExportIgnore/sub/exclude-from-root ]]
# [[ -e $pathWithExportIgnore/sub/exclude-deep ]]
# exportIgnore can be explicitly disabled with submodules
pathWithoutExportIgnore=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; exportIgnore = false; }).outPath")
# find $pathWithoutExportIgnore
[[ -e $pathWithoutExportIgnore/exclude-from-root ]]
[[ -e $pathWithoutExportIgnore/sub/exclude-from-sub ]]
# exportIgnore defaults to false when submodules = true
pathWithSubmodules=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$rootRepo; submodules = true; }).outPath")
[[ -e $pathWithoutExportIgnore/exclude-from-root ]]
[[ -e $pathWithoutExportIgnore/sub/exclude-from-sub ]]