fetchTree: shallow git fetching by default

Motivation:
make git fetching more efficient for most repos by default
This commit is contained in:
DavHau 2024-02-17 19:36:32 +07:00
parent 60936f28e5
commit 358c26fd13
3 changed files with 78 additions and 6 deletions

View file

@ -0,0 +1,12 @@
---
synopsis: "`fetchTree` now fetches git repositories shallowly by default"
prs: 10028
---
`builtins.fetchTree` now clones git repositories shallowly by default, which reduces network traffic and disk usage significantly in many cases.
Previously, the default behavior was to clone the full history of a specific tag or branch (eg. `ref`) and only afterwards extract the files of one specific revision.
From now on, the `ref` and `allRefs` arguments will be ignored, except if shallow cloning is disabled by setting `shallow = false`.
The defaults for `builtins.fetchGit` remain unchanged. Here, shallow cloning has to be enabled manually by passing `shallow = true`.

View file

@ -138,6 +138,11 @@ static void fetchTree(
attrs.emplace("exportIgnore", Explicit<bool>{true});
}
// fetchTree should fetch git repos with shallow = true by default
if (type == "git" && !params.isFetchGit && !attrs.contains("shallow")) {
attrs.emplace("shallow", Explicit<bool>{true});
}
if (!params.allowNameArgument)
if (auto nameIter = attrs.find("name"); nameIter != attrs.end())
state.error<EvalError>(
@ -321,6 +326,8 @@ static RegisterPrimOp primop_fetchTree({
- `ref` (String, optional)
By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled.
A [Git reference](https://git-scm.com/book/en/v2/Git-Internals-Git-References), such as a branch or tag name.
Default: `"HEAD"`
@ -334,8 +341,9 @@ static RegisterPrimOp primop_fetchTree({
- `shallow` (Bool, optional)
Make a shallow clone when fetching the Git tree.
When this is enabled, the options `ref` and `allRefs` have no effect anymore.
Default: `false`
Default: `true`
- `submodules` (Bool, optional)
@ -345,8 +353,11 @@ static RegisterPrimOp primop_fetchTree({
- `allRefs` (Bool, optional)
If set to `true`, always fetch the entire repository, even if the latest commit is still in the cache.
Otherwise, only the latest commit is fetched if it is not already cached.
By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled.
Whether to fetch all references (eg. branches and tags) of the repository.
With this argument being true, it's possible to load a `rev` from *any* `ref`.
(Without setting this option, only `rev`s from the specified `ref` are supported).
Default: `false`
@ -600,6 +611,8 @@ static RegisterPrimOp primop_fetchGit({
[Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References
This option has no effect once `shallow` cloning is enabled.
By default, the `ref` value is prefixed with `refs/heads/`.
As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`.
@ -617,13 +630,15 @@ static RegisterPrimOp primop_fetchGit({
- `shallow` (default: `false`)
Make a shallow clone when fetching the Git tree.
When this is enabled, the options `ref` and `allRefs` have no effect anymore.
- `allRefs`
Whether to fetch all references of the repository.
With this argument being true, it's possible to load a `rev` from *any* `ref`
Whether to fetch all references (eg. branches and tags) of the repository.
With this argument being true, it's possible to load a `rev` from *any* `ref`.
(by default only `rev`s from the specified `ref` are supported).
This option has no effect once `shallow` cloning is enabled.
- `verifyCommit` (default: `true` if `publicKey` or `publicKeys` are provided, otherwise `false`)
Whether to check `rev` for a signature matching `publicKey` or `publicKeys`.

View file

@ -0,0 +1,45 @@
{
description = "fetchTree fetches git repos shallowly by default";
script = ''
# purge nix git cache to make sure we start with a clean slate
client.succeed("rm -rf ~/.cache/nix")
# add two commits to the repo:
# - one with a large file (2M)
# - another one making the file small again
client.succeed(f"""
dd if=/dev/urandom of={repo.path}/thailand bs=1M count=2 \
&& {repo.git} add thailand \
&& {repo.git} commit -m 'commit1' \
&& echo 'ThaigerSprint' > {repo.path}/thailand \
&& {repo.git} add thailand \
&& {repo.git} commit -m 'commit2' \
&& {repo.git} push origin main
""")
# memoize the revision
commit2_rev = client.succeed(f"""
{repo.git} rev-parse HEAD
""").strip()
# construct the fetcher call
fetchGit_expr = f"""
builtins.fetchTree {{
type = "git";
url = "{repo.remote}";
rev = "{commit2_rev}";
}}
"""
# fetch the repo via nix
fetched1 = client.succeed(f"""
nix eval --impure --raw --expr '({fetchGit_expr}).outPath'
""")
# check that the size of ~/.cache/nix is less than 1M
cache_size = client.succeed("""
du -s ~/.cache/nix
""").strip().split()[0]
assert int(cache_size) < 1024, f"cache size is {cache_size}K which is larger than 1M"
'';
}