mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2024-11-22 05:56:15 +02:00
Merge pull request #11195 from DeterminateSystems/tarball-roots
Improve handling of tarballs that don't consist of a single top-level directory
This commit is contained in:
commit
0b96c586e0
8 changed files with 107 additions and 32 deletions
9
doc/manual/rl-next/tarball-fixes.md
Normal file
9
doc/manual/rl-next/tarball-fixes.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
---
|
||||||
|
synopsis: "Improve handling of tarballs that don't consist of a single top-level directory"
|
||||||
|
prs:
|
||||||
|
- 11195
|
||||||
|
---
|
||||||
|
|
||||||
|
In previous Nix releases, the tarball fetcher (used by `builtins.fetchTarball`) erroneously merged top-level directories into a single directory, and silently discarded top-level files that are not directories. This is no longer the case. The new behaviour is that *only* if the tarball consists of a single directory, the top-level path component of the files in the tarball is removed (similar to `tar`'s `--strip-components=1`).
|
||||||
|
|
||||||
|
Author: [**Eelco Dolstra (@edolstra)**](https://github.com/edolstra)
|
|
@ -559,11 +559,11 @@ static RegisterPrimOp primop_fetchTarball({
|
||||||
.doc = R"(
|
.doc = R"(
|
||||||
Download the specified URL, unpack it and return the path of the
|
Download the specified URL, unpack it and return the path of the
|
||||||
unpacked tree. The file must be a tape archive (`.tar`) compressed
|
unpacked tree. The file must be a tape archive (`.tar`) compressed
|
||||||
with `gzip`, `bzip2` or `xz`. The top-level path component of the
|
with `gzip`, `bzip2` or `xz`. If the tarball consists of a
|
||||||
files in the tarball is removed, so it is best if the tarball
|
single directory, then the top-level path component of the files
|
||||||
contains a single directory at top level. The typical use of the
|
in the tarball is removed. The typical use of the function is to
|
||||||
function is to obtain external Nix expression dependencies, such as
|
obtain external Nix expression dependencies, such as a
|
||||||
a particular version of Nixpkgs, e.g.
|
particular version of Nixpkgs, e.g.
|
||||||
|
|
||||||
```nix
|
```nix
|
||||||
with import (fetchTarball https://github.com/NixOS/nixpkgs/archive/nixos-14.12.tar.gz) {};
|
with import (fetchTarball https://github.com/NixOS/nixpkgs/archive/nixos-14.12.tar.gz) {};
|
||||||
|
|
|
@ -126,16 +126,39 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
T peelObject(git_repository * repo, git_object * obj, git_object_t type)
|
T peelObject(git_object * obj, git_object_t type)
|
||||||
{
|
{
|
||||||
T obj2;
|
T obj2;
|
||||||
if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) {
|
if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) {
|
||||||
auto err = git_error_last();
|
auto err = git_error_last();
|
||||||
throw Error("peeling Git object '%s': %s", git_object_id(obj), err->message);
|
throw Error("peeling Git object '%s': %s", *git_object_id(obj), err->message);
|
||||||
}
|
}
|
||||||
return obj2;
|
return obj2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T dupObject(typename T::pointer obj)
|
||||||
|
{
|
||||||
|
T obj2;
|
||||||
|
if (git_object_dup((git_object * *) (typename T::pointer *) Setter(obj2), (git_object *) obj))
|
||||||
|
throw Error("duplicating object '%s': %s", *git_object_id((git_object *) obj), git_error_last()->message);
|
||||||
|
return obj2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Peel the specified object (i.e. follow tag and commit objects) to
|
||||||
|
* either a blob or a tree.
|
||||||
|
*/
|
||||||
|
static Object peelToTreeOrBlob(git_object * obj)
|
||||||
|
{
|
||||||
|
/* git_object_peel() doesn't handle blob objects, so handle those
|
||||||
|
specially. */
|
||||||
|
if (git_object_type(obj) == GIT_OBJECT_BLOB)
|
||||||
|
return dupObject<Object>(obj);
|
||||||
|
else
|
||||||
|
return peelObject<Object>(obj, GIT_OBJECT_TREE);
|
||||||
|
}
|
||||||
|
|
||||||
struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
||||||
{
|
{
|
||||||
/** Location of the repository on disk. */
|
/** Location of the repository on disk. */
|
||||||
|
@ -166,7 +189,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
||||||
std::unordered_set<git_oid> done;
|
std::unordered_set<git_oid> done;
|
||||||
std::queue<Commit> todo;
|
std::queue<Commit> todo;
|
||||||
|
|
||||||
todo.push(peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
|
todo.push(peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
|
||||||
|
|
||||||
while (auto commit = pop(todo)) {
|
while (auto commit = pop(todo)) {
|
||||||
if (!done.insert(*git_commit_id(commit->get())).second) continue;
|
if (!done.insert(*git_commit_id(commit->get())).second) continue;
|
||||||
|
@ -184,7 +207,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
||||||
|
|
||||||
uint64_t getLastModified(const Hash & rev) override
|
uint64_t getLastModified(const Hash & rev) override
|
||||||
{
|
{
|
||||||
auto commit = peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
|
auto commit = peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
|
||||||
|
|
||||||
return git_commit_time(commit.get());
|
return git_commit_time(commit.get());
|
||||||
}
|
}
|
||||||
|
@ -463,6 +486,23 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
||||||
|
|
||||||
return narHash;
|
return narHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Hash dereferenceSingletonDirectory(const Hash & oid_) override
|
||||||
|
{
|
||||||
|
auto oid = hashToOID(oid_);
|
||||||
|
|
||||||
|
auto _tree = lookupObject(*this, oid, GIT_OBJECT_TREE);
|
||||||
|
auto tree = (const git_tree *) &*_tree;
|
||||||
|
|
||||||
|
if (git_tree_entrycount(tree) == 1) {
|
||||||
|
auto entry = git_tree_entry_byindex(tree, 0);
|
||||||
|
auto mode = git_tree_entry_filemode(entry);
|
||||||
|
if (mode == GIT_FILEMODE_TREE)
|
||||||
|
oid = *git_tree_entry_id(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return toHash(oid);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
|
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
|
||||||
|
@ -476,11 +516,11 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
|
||||||
struct GitSourceAccessor : SourceAccessor
|
struct GitSourceAccessor : SourceAccessor
|
||||||
{
|
{
|
||||||
ref<GitRepoImpl> repo;
|
ref<GitRepoImpl> repo;
|
||||||
Tree root;
|
Object root;
|
||||||
|
|
||||||
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
|
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
|
||||||
: repo(repo_)
|
: repo(repo_)
|
||||||
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE))
|
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -506,7 +546,7 @@ struct GitSourceAccessor : SourceAccessor
|
||||||
std::optional<Stat> maybeLstat(const CanonPath & path) override
|
std::optional<Stat> maybeLstat(const CanonPath & path) override
|
||||||
{
|
{
|
||||||
if (path.isRoot())
|
if (path.isRoot())
|
||||||
return Stat { .type = tDirectory };
|
return Stat { .type = git_object_type(root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular };
|
||||||
|
|
||||||
auto entry = lookup(path);
|
auto entry = lookup(path);
|
||||||
if (!entry)
|
if (!entry)
|
||||||
|
@ -616,10 +656,10 @@ struct GitSourceAccessor : SourceAccessor
|
||||||
std::optional<Tree> lookupTree(const CanonPath & path)
|
std::optional<Tree> lookupTree(const CanonPath & path)
|
||||||
{
|
{
|
||||||
if (path.isRoot()) {
|
if (path.isRoot()) {
|
||||||
Tree tree;
|
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
|
||||||
if (git_tree_dup(Setter(tree), root.get()))
|
return dupObject<Tree>((git_tree *) &*root);
|
||||||
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
|
else
|
||||||
return tree;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto entry = lookup(path);
|
auto entry = lookup(path);
|
||||||
|
@ -646,10 +686,10 @@ struct GitSourceAccessor : SourceAccessor
|
||||||
std::variant<Tree, Submodule> getTree(const CanonPath & path)
|
std::variant<Tree, Submodule> getTree(const CanonPath & path)
|
||||||
{
|
{
|
||||||
if (path.isRoot()) {
|
if (path.isRoot()) {
|
||||||
Tree tree;
|
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
|
||||||
if (git_tree_dup(Setter(tree), root.get()))
|
return dupObject<Tree>((git_tree *) &*root);
|
||||||
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
|
else
|
||||||
return tree;
|
throw Error("Git root object '%s' is not a directory", *git_object_id(root.get()));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto entry = need(path);
|
auto entry = need(path);
|
||||||
|
@ -669,6 +709,9 @@ struct GitSourceAccessor : SourceAccessor
|
||||||
|
|
||||||
Blob getBlob(const CanonPath & path, bool expectSymlink)
|
Blob getBlob(const CanonPath & path, bool expectSymlink)
|
||||||
{
|
{
|
||||||
|
if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB)
|
||||||
|
return dupObject<Blob>((git_blob *) &*root);
|
||||||
|
|
||||||
auto notExpected = [&]()
|
auto notExpected = [&]()
|
||||||
{
|
{
|
||||||
throw Error(
|
throw Error(
|
||||||
|
@ -782,8 +825,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
||||||
|
|
||||||
std::vector<PendingDir> pendingDirs;
|
std::vector<PendingDir> pendingDirs;
|
||||||
|
|
||||||
size_t componentsToStrip = 1;
|
|
||||||
|
|
||||||
void pushBuilder(std::string name)
|
void pushBuilder(std::string name)
|
||||||
{
|
{
|
||||||
git_treebuilder * b;
|
git_treebuilder * b;
|
||||||
|
@ -839,9 +880,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
||||||
{
|
{
|
||||||
std::span<const std::string> pathComponents2{pathComponents};
|
std::span<const std::string> pathComponents2{pathComponents};
|
||||||
|
|
||||||
if (pathComponents2.size() <= componentsToStrip) return false;
|
|
||||||
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
|
||||||
|
|
||||||
updateBuilders(
|
updateBuilders(
|
||||||
isDir
|
isDir
|
||||||
? pathComponents2
|
? pathComponents2
|
||||||
|
@ -964,7 +1002,8 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
||||||
git_tree_entry_filemode(entry));
|
git_tree_entry_filemode(entry));
|
||||||
}
|
}
|
||||||
|
|
||||||
Hash sync() override {
|
Hash sync() override
|
||||||
|
{
|
||||||
updateBuilders({});
|
updateBuilders({});
|
||||||
|
|
||||||
auto [oid, _name] = popBuilder();
|
auto [oid, _name] = popBuilder();
|
||||||
|
|
|
@ -98,6 +98,13 @@ struct GitRepo
|
||||||
* serialisation. This is memoised on-disk.
|
* serialisation. This is memoised on-disk.
|
||||||
*/
|
*/
|
||||||
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;
|
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the specified Git object is a directory with a single entry
|
||||||
|
* that is a directory, return the ID of that object.
|
||||||
|
* Otherwise, return the passed ID unchanged.
|
||||||
|
*/
|
||||||
|
virtual Hash dereferenceSingletonDirectory(const Hash & oid) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
ref<GitRepo> getTarballCache();
|
ref<GitRepo> getTarballCache();
|
||||||
|
|
|
@ -258,13 +258,14 @@ struct GitArchiveInputScheme : InputScheme
|
||||||
fmt("unpacking '%s' into the Git cache", input.to_string()));
|
fmt("unpacking '%s' into the Git cache", input.to_string()));
|
||||||
|
|
||||||
TarArchive archive { *source };
|
TarArchive archive { *source };
|
||||||
auto parseSink = getTarballCache()->getFileSystemObjectSink();
|
auto tarballCache = getTarballCache();
|
||||||
|
auto parseSink = tarballCache->getFileSystemObjectSink();
|
||||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||||
|
|
||||||
act.reset();
|
act.reset();
|
||||||
|
|
||||||
TarballInfo tarballInfo {
|
TarballInfo tarballInfo {
|
||||||
.treeHash = parseSink->sync(),
|
.treeHash = tarballCache->dereferenceSingletonDirectory(parseSink->sync()),
|
||||||
.lastModified = lastModified
|
.lastModified = lastModified
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -167,7 +167,8 @@ DownloadTarballResult downloadTarball(
|
||||||
TarArchive{path};
|
TarArchive{path};
|
||||||
})
|
})
|
||||||
: TarArchive{*source};
|
: TarArchive{*source};
|
||||||
auto parseSink = getTarballCache()->getFileSystemObjectSink();
|
auto tarballCache = getTarballCache();
|
||||||
|
auto parseSink = tarballCache->getFileSystemObjectSink();
|
||||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||||
|
|
||||||
act.reset();
|
act.reset();
|
||||||
|
@ -182,7 +183,8 @@ DownloadTarballResult downloadTarball(
|
||||||
infoAttrs = cached->value;
|
infoAttrs = cached->value;
|
||||||
} else {
|
} else {
|
||||||
infoAttrs.insert_or_assign("etag", res->etag);
|
infoAttrs.insert_or_assign("etag", res->etag);
|
||||||
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
|
infoAttrs.insert_or_assign("treeHash",
|
||||||
|
tarballCache->dereferenceSingletonDirectory(parseSink->sync()).gitRev());
|
||||||
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
|
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
|
||||||
if (res->immutableUrl)
|
if (res->immutableUrl)
|
||||||
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
|
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
|
||||||
|
|
|
@ -83,3 +83,20 @@ path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .s
|
||||||
[[ $(cat "$path/a/zzz") = bar ]]
|
[[ $(cat "$path/a/zzz") = bar ]]
|
||||||
[[ $(cat "$path/c/aap") = bar ]]
|
[[ $(cat "$path/c/aap") = bar ]]
|
||||||
[[ $(cat "$path/fnord") = bar ]]
|
[[ $(cat "$path/fnord") = bar ]]
|
||||||
|
|
||||||
|
# Test a tarball that has multiple top-level directories.
|
||||||
|
rm -rf "$TEST_ROOT/tar_root"
|
||||||
|
mkdir -p "$TEST_ROOT/tar_root" "$TEST_ROOT/tar_root/foo" "$TEST_ROOT/tar_root/bar"
|
||||||
|
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
|
||||||
|
path="$(nix flake prefetch --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
|
||||||
|
[[ -d "$path/foo" ]]
|
||||||
|
[[ -d "$path/bar" ]]
|
||||||
|
|
||||||
|
# Test a tarball that has a single regular file.
|
||||||
|
rm -rf "$TEST_ROOT/tar_root"
|
||||||
|
mkdir -p "$TEST_ROOT/tar_root"
|
||||||
|
echo bar > "$TEST_ROOT/tar_root/foo"
|
||||||
|
chmod +x "$TEST_ROOT/tar_root/foo"
|
||||||
|
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
|
||||||
|
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
|
||||||
|
[[ $(cat "$path/foo") = bar ]]
|
||||||
|
|
|
@ -77,7 +77,7 @@ TEST_F(GitUtilsTest, sink_basic)
|
||||||
|
|
||||||
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));
|
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));
|
||||||
|
|
||||||
auto result = sink->sync();
|
auto result = repo->dereferenceSingletonDirectory(sink->sync());
|
||||||
auto accessor = repo->getAccessor(result, false);
|
auto accessor = repo->getAccessor(result, false);
|
||||||
auto entries = accessor->readDirectory(CanonPath::root);
|
auto entries = accessor->readDirectory(CanonPath::root);
|
||||||
ASSERT_EQ(entries.size(), 5);
|
ASSERT_EQ(entries.size(), 5);
|
||||||
|
@ -103,7 +103,7 @@ TEST_F(GitUtilsTest, sink_hardlink)
|
||||||
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
|
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
|
||||||
FAIL() << "Expected an exception";
|
FAIL() << "Expected an exception";
|
||||||
} catch (const nix::Error & e) {
|
} catch (const nix::Error & e) {
|
||||||
ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target"));
|
ASSERT_THAT(e.msg(), testing::HasSubstr("cannot find hard link target"));
|
||||||
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
|
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
|
||||||
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
|
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue