mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2024-11-25 07:16:17 +02:00
Merge pull request #11195 from DeterminateSystems/tarball-roots
Improve handling of tarballs that don't consist of a single top-level directory
This commit is contained in:
commit
0b96c586e0
8 changed files with 107 additions and 32 deletions
9
doc/manual/rl-next/tarball-fixes.md
Normal file
9
doc/manual/rl-next/tarball-fixes.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
synopsis: "Improve handling of tarballs that don't consist of a single top-level directory"
|
||||
prs:
|
||||
- 11195
|
||||
---
|
||||
|
||||
In previous Nix releases, the tarball fetcher (used by `builtins.fetchTarball`) erroneously merged top-level directories into a single directory, and silently discarded top-level files that are not directories. This is no longer the case. The new behaviour is that *only* if the tarball consists of a single directory, the top-level path component of the files in the tarball is removed (similar to `tar`'s `--strip-components=1`).
|
||||
|
||||
Author: [**Eelco Dolstra (@edolstra)**](https://github.com/edolstra)
|
|
@ -559,11 +559,11 @@ static RegisterPrimOp primop_fetchTarball({
|
|||
.doc = R"(
|
||||
Download the specified URL, unpack it and return the path of the
|
||||
unpacked tree. The file must be a tape archive (`.tar`) compressed
|
||||
with `gzip`, `bzip2` or `xz`. The top-level path component of the
|
||||
files in the tarball is removed, so it is best if the tarball
|
||||
contains a single directory at top level. The typical use of the
|
||||
function is to obtain external Nix expression dependencies, such as
|
||||
a particular version of Nixpkgs, e.g.
|
||||
with `gzip`, `bzip2` or `xz`. If the tarball consists of a
|
||||
single directory, then the top-level path component of the files
|
||||
in the tarball is removed. The typical use of the function is to
|
||||
obtain external Nix expression dependencies, such as a
|
||||
particular version of Nixpkgs, e.g.
|
||||
|
||||
```nix
|
||||
with import (fetchTarball https://github.com/NixOS/nixpkgs/archive/nixos-14.12.tar.gz) {};
|
||||
|
|
|
@ -126,16 +126,39 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
T peelObject(git_repository * repo, git_object * obj, git_object_t type)
|
||||
T peelObject(git_object * obj, git_object_t type)
|
||||
{
|
||||
T obj2;
|
||||
if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) {
|
||||
auto err = git_error_last();
|
||||
throw Error("peeling Git object '%s': %s", git_object_id(obj), err->message);
|
||||
throw Error("peeling Git object '%s': %s", *git_object_id(obj), err->message);
|
||||
}
|
||||
return obj2;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T dupObject(typename T::pointer obj)
|
||||
{
|
||||
T obj2;
|
||||
if (git_object_dup((git_object * *) (typename T::pointer *) Setter(obj2), (git_object *) obj))
|
||||
throw Error("duplicating object '%s': %s", *git_object_id((git_object *) obj), git_error_last()->message);
|
||||
return obj2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Peel the specified object (i.e. follow tag and commit objects) to
|
||||
* either a blob or a tree.
|
||||
*/
|
||||
static Object peelToTreeOrBlob(git_object * obj)
|
||||
{
|
||||
/* git_object_peel() doesn't handle blob objects, so handle those
|
||||
specially. */
|
||||
if (git_object_type(obj) == GIT_OBJECT_BLOB)
|
||||
return dupObject<Object>(obj);
|
||||
else
|
||||
return peelObject<Object>(obj, GIT_OBJECT_TREE);
|
||||
}
|
||||
|
||||
struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
||||
{
|
||||
/** Location of the repository on disk. */
|
||||
|
@ -166,7 +189,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
std::unordered_set<git_oid> done;
|
||||
std::queue<Commit> todo;
|
||||
|
||||
todo.push(peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
|
||||
todo.push(peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
|
||||
|
||||
while (auto commit = pop(todo)) {
|
||||
if (!done.insert(*git_commit_id(commit->get())).second) continue;
|
||||
|
@ -184,7 +207,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
|
||||
uint64_t getLastModified(const Hash & rev) override
|
||||
{
|
||||
auto commit = peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
|
||||
auto commit = peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
|
||||
|
||||
return git_commit_time(commit.get());
|
||||
}
|
||||
|
@ -463,6 +486,23 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
|
||||
return narHash;
|
||||
}
|
||||
|
||||
Hash dereferenceSingletonDirectory(const Hash & oid_) override
|
||||
{
|
||||
auto oid = hashToOID(oid_);
|
||||
|
||||
auto _tree = lookupObject(*this, oid, GIT_OBJECT_TREE);
|
||||
auto tree = (const git_tree *) &*_tree;
|
||||
|
||||
if (git_tree_entrycount(tree) == 1) {
|
||||
auto entry = git_tree_entry_byindex(tree, 0);
|
||||
auto mode = git_tree_entry_filemode(entry);
|
||||
if (mode == GIT_FILEMODE_TREE)
|
||||
oid = *git_tree_entry_id(entry);
|
||||
}
|
||||
|
||||
return toHash(oid);
|
||||
}
|
||||
};
|
||||
|
||||
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
|
||||
|
@ -476,11 +516,11 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
|
|||
struct GitSourceAccessor : SourceAccessor
|
||||
{
|
||||
ref<GitRepoImpl> repo;
|
||||
Tree root;
|
||||
Object root;
|
||||
|
||||
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
|
||||
: repo(repo_)
|
||||
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE))
|
||||
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -506,7 +546,7 @@ struct GitSourceAccessor : SourceAccessor
|
|||
std::optional<Stat> maybeLstat(const CanonPath & path) override
|
||||
{
|
||||
if (path.isRoot())
|
||||
return Stat { .type = tDirectory };
|
||||
return Stat { .type = git_object_type(root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular };
|
||||
|
||||
auto entry = lookup(path);
|
||||
if (!entry)
|
||||
|
@ -616,10 +656,10 @@ struct GitSourceAccessor : SourceAccessor
|
|||
std::optional<Tree> lookupTree(const CanonPath & path)
|
||||
{
|
||||
if (path.isRoot()) {
|
||||
Tree tree;
|
||||
if (git_tree_dup(Setter(tree), root.get()))
|
||||
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
|
||||
return tree;
|
||||
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
|
||||
return dupObject<Tree>((git_tree *) &*root);
|
||||
else
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto entry = lookup(path);
|
||||
|
@ -646,10 +686,10 @@ struct GitSourceAccessor : SourceAccessor
|
|||
std::variant<Tree, Submodule> getTree(const CanonPath & path)
|
||||
{
|
||||
if (path.isRoot()) {
|
||||
Tree tree;
|
||||
if (git_tree_dup(Setter(tree), root.get()))
|
||||
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
|
||||
return tree;
|
||||
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
|
||||
return dupObject<Tree>((git_tree *) &*root);
|
||||
else
|
||||
throw Error("Git root object '%s' is not a directory", *git_object_id(root.get()));
|
||||
}
|
||||
|
||||
auto entry = need(path);
|
||||
|
@ -669,6 +709,9 @@ struct GitSourceAccessor : SourceAccessor
|
|||
|
||||
Blob getBlob(const CanonPath & path, bool expectSymlink)
|
||||
{
|
||||
if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB)
|
||||
return dupObject<Blob>((git_blob *) &*root);
|
||||
|
||||
auto notExpected = [&]()
|
||||
{
|
||||
throw Error(
|
||||
|
@ -782,8 +825,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
size_t componentsToStrip = 1;
|
||||
|
||||
void pushBuilder(std::string name)
|
||||
{
|
||||
git_treebuilder * b;
|
||||
|
@ -839,9 +880,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
{
|
||||
std::span<const std::string> pathComponents2{pathComponents};
|
||||
|
||||
if (pathComponents2.size() <= componentsToStrip) return false;
|
||||
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
||||
|
||||
updateBuilders(
|
||||
isDir
|
||||
? pathComponents2
|
||||
|
@ -964,7 +1002,8 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
|||
git_tree_entry_filemode(entry));
|
||||
}
|
||||
|
||||
Hash sync() override {
|
||||
Hash sync() override
|
||||
{
|
||||
updateBuilders({});
|
||||
|
||||
auto [oid, _name] = popBuilder();
|
||||
|
|
|
@ -98,6 +98,13 @@ struct GitRepo
|
|||
* serialisation. This is memoised on-disk.
|
||||
*/
|
||||
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;
|
||||
|
||||
/**
|
||||
* If the specified Git object is a directory with a single entry
|
||||
* that is a directory, return the ID of that object.
|
||||
* Otherwise, return the passed ID unchanged.
|
||||
*/
|
||||
virtual Hash dereferenceSingletonDirectory(const Hash & oid) = 0;
|
||||
};
|
||||
|
||||
ref<GitRepo> getTarballCache();
|
||||
|
|
|
@ -258,13 +258,14 @@ struct GitArchiveInputScheme : InputScheme
|
|||
fmt("unpacking '%s' into the Git cache", input.to_string()));
|
||||
|
||||
TarArchive archive { *source };
|
||||
auto parseSink = getTarballCache()->getFileSystemObjectSink();
|
||||
auto tarballCache = getTarballCache();
|
||||
auto parseSink = tarballCache->getFileSystemObjectSink();
|
||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||
|
||||
act.reset();
|
||||
|
||||
TarballInfo tarballInfo {
|
||||
.treeHash = parseSink->sync(),
|
||||
.treeHash = tarballCache->dereferenceSingletonDirectory(parseSink->sync()),
|
||||
.lastModified = lastModified
|
||||
};
|
||||
|
||||
|
|
|
@ -167,7 +167,8 @@ DownloadTarballResult downloadTarball(
|
|||
TarArchive{path};
|
||||
})
|
||||
: TarArchive{*source};
|
||||
auto parseSink = getTarballCache()->getFileSystemObjectSink();
|
||||
auto tarballCache = getTarballCache();
|
||||
auto parseSink = tarballCache->getFileSystemObjectSink();
|
||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||
|
||||
act.reset();
|
||||
|
@ -182,7 +183,8 @@ DownloadTarballResult downloadTarball(
|
|||
infoAttrs = cached->value;
|
||||
} else {
|
||||
infoAttrs.insert_or_assign("etag", res->etag);
|
||||
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
|
||||
infoAttrs.insert_or_assign("treeHash",
|
||||
tarballCache->dereferenceSingletonDirectory(parseSink->sync()).gitRev());
|
||||
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
|
||||
if (res->immutableUrl)
|
||||
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
|
||||
|
|
|
@ -83,3 +83,20 @@ path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .s
|
|||
[[ $(cat "$path/a/zzz") = bar ]]
|
||||
[[ $(cat "$path/c/aap") = bar ]]
|
||||
[[ $(cat "$path/fnord") = bar ]]
|
||||
|
||||
# Test a tarball that has multiple top-level directories.
|
||||
rm -rf "$TEST_ROOT/tar_root"
|
||||
mkdir -p "$TEST_ROOT/tar_root" "$TEST_ROOT/tar_root/foo" "$TEST_ROOT/tar_root/bar"
|
||||
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
|
||||
path="$(nix flake prefetch --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
|
||||
[[ -d "$path/foo" ]]
|
||||
[[ -d "$path/bar" ]]
|
||||
|
||||
# Test a tarball that has a single regular file.
|
||||
rm -rf "$TEST_ROOT/tar_root"
|
||||
mkdir -p "$TEST_ROOT/tar_root"
|
||||
echo bar > "$TEST_ROOT/tar_root/foo"
|
||||
chmod +x "$TEST_ROOT/tar_root/foo"
|
||||
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
|
||||
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
|
||||
[[ $(cat "$path/foo") = bar ]]
|
||||
|
|
|
@ -77,7 +77,7 @@ TEST_F(GitUtilsTest, sink_basic)
|
|||
|
||||
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));
|
||||
|
||||
auto result = sink->sync();
|
||||
auto result = repo->dereferenceSingletonDirectory(sink->sync());
|
||||
auto accessor = repo->getAccessor(result, false);
|
||||
auto entries = accessor->readDirectory(CanonPath::root);
|
||||
ASSERT_EQ(entries.size(), 5);
|
||||
|
@ -103,7 +103,7 @@ TEST_F(GitUtilsTest, sink_hardlink)
|
|||
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
|
||||
FAIL() << "Expected an exception";
|
||||
} catch (const nix::Error & e) {
|
||||
ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target"));
|
||||
ASSERT_THAT(e.msg(), testing::HasSubstr("cannot find hard link target"));
|
||||
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
|
||||
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue