Merge pull request #10028 from DavHau/fetchTree-shallow-default

fetchTree: shallow git fetching by default
This commit is contained in:
Eelco Dolstra 2024-06-03 16:02:34 +02:00 committed by GitHub
commit ac3e5d22e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 78 additions and 6 deletions

View file

@ -0,0 +1,12 @@
---
synopsis: "`fetchTree` now fetches git repositories shallowly by default"
prs: 10028
---
`builtins.fetchTree` now clones git repositories shallowly by default, which reduces network traffic and disk usage significantly in many cases.
Previously, the default behavior was to clone the full history of a specific tag or branch (eg. `ref`) and only afterwards extract the files of one specific revision.
From now on, the `ref` and `allRefs` arguments will be ignored, except if shallow cloning is disabled by setting `shallow = false`.
The defaults for `builtins.fetchGit` remain unchanged. Here, shallow cloning has to be enabled manually by passing `shallow = true`.

View file

@ -137,6 +137,11 @@ static void fetchTree(
attrs.emplace("exportIgnore", Explicit<bool>{true}); attrs.emplace("exportIgnore", Explicit<bool>{true});
} }
// fetchTree should fetch git repos with shallow = true by default
if (type == "git" && !params.isFetchGit && !attrs.contains("shallow")) {
attrs.emplace("shallow", Explicit<bool>{true});
}
if (!params.allowNameArgument) if (!params.allowNameArgument)
if (auto nameIter = attrs.find("name"); nameIter != attrs.end()) if (auto nameIter = attrs.find("name"); nameIter != attrs.end())
state.error<EvalError>( state.error<EvalError>(
@ -320,6 +325,8 @@ static RegisterPrimOp primop_fetchTree({
- `ref` (String, optional) - `ref` (String, optional)
By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled.
A [Git reference](https://git-scm.com/book/en/v2/Git-Internals-Git-References), such as a branch or tag name. A [Git reference](https://git-scm.com/book/en/v2/Git-Internals-Git-References), such as a branch or tag name.
Default: `"HEAD"` Default: `"HEAD"`
@ -333,8 +340,9 @@ static RegisterPrimOp primop_fetchTree({
- `shallow` (Bool, optional) - `shallow` (Bool, optional)
Make a shallow clone when fetching the Git tree. Make a shallow clone when fetching the Git tree.
When this is enabled, the options `ref` and `allRefs` have no effect anymore.
Default: `false` Default: `true`
- `submodules` (Bool, optional) - `submodules` (Bool, optional)
@ -344,8 +352,11 @@ static RegisterPrimOp primop_fetchTree({
- `allRefs` (Bool, optional) - `allRefs` (Bool, optional)
If set to `true`, always fetch the entire repository, even if the latest commit is still in the cache. By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled.
Otherwise, only the latest commit is fetched if it is not already cached.
Whether to fetch all references (eg. branches and tags) of the repository.
With this argument being true, it's possible to load a `rev` from *any* `ref`.
(Without setting this option, only `rev`s from the specified `ref` are supported).
Default: `false` Default: `false`
@ -599,6 +610,8 @@ static RegisterPrimOp primop_fetchGit({
[Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References
This option has no effect once `shallow` cloning is enabled.
By default, the `ref` value is prefixed with `refs/heads/`. By default, the `ref` value is prefixed with `refs/heads/`.
As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`. As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`.
@ -616,13 +629,15 @@ static RegisterPrimOp primop_fetchGit({
- `shallow` (default: `false`) - `shallow` (default: `false`)
Make a shallow clone when fetching the Git tree. Make a shallow clone when fetching the Git tree.
When this is enabled, the options `ref` and `allRefs` have no effect anymore.
- `allRefs` - `allRefs`
Whether to fetch all references of the repository. Whether to fetch all references (eg. branches and tags) of the repository.
With this argument being true, it's possible to load a `rev` from *any* `ref` With this argument being true, it's possible to load a `rev` from *any* `ref`.
(by default only `rev`s from the specified `ref` are supported). (by default only `rev`s from the specified `ref` are supported).
This option has no effect once `shallow` cloning is enabled.
- `verifyCommit` (default: `true` if `publicKey` or `publicKeys` are provided, otherwise `false`) - `verifyCommit` (default: `true` if `publicKey` or `publicKeys` are provided, otherwise `false`)
Whether to check `rev` for a signature matching `publicKey` or `publicKeys`. Whether to check `rev` for a signature matching `publicKey` or `publicKeys`.

View file

@ -0,0 +1,45 @@
{
description = "fetchTree fetches git repos shallowly by default";
script = ''
# purge nix git cache to make sure we start with a clean slate
client.succeed("rm -rf ~/.cache/nix")
# add two commits to the repo:
# - one with a large file (2M)
# - another one making the file small again
client.succeed(f"""
dd if=/dev/urandom of={repo.path}/thailand bs=1M count=2 \
&& {repo.git} add thailand \
&& {repo.git} commit -m 'commit1' \
&& echo 'ThaigerSprint' > {repo.path}/thailand \
&& {repo.git} add thailand \
&& {repo.git} commit -m 'commit2' \
&& {repo.git} push origin main
""")
# memoize the revision
commit2_rev = client.succeed(f"""
{repo.git} rev-parse HEAD
""").strip()
# construct the fetcher call
fetchGit_expr = f"""
builtins.fetchTree {{
type = "git";
url = "{repo.remote}";
rev = "{commit2_rev}";
}}
"""
# fetch the repo via nix
fetched1 = client.succeed(f"""
nix eval --impure --raw --expr '({fetchGit_expr}).outPath'
""")
# check that the size of ~/.cache/nix is less than 1M
cache_size = client.succeed("""
du -s ~/.cache/nix
""").strip().split()[0]
assert int(cache_size) < 1024, f"cache size is {cache_size}K which is larger than 1M"
'';
}