From bcb5f235f963d3e213c3dbe104be91a9a0a6dd29 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 28 Feb 2024 10:56:07 -0500 Subject: [PATCH] Support symlinks properly with `git-hashing` experimental feature Before, they would not be written to a file `FileSystemObjectSink` correctly. --- src/libutil/git.cc | 75 +++++++++++++++++++------ src/libutil/git.hh | 21 ++++++- tests/functional/git-hashing/simple.sh | 9 +++ tests/unit/libutil/data/git/tree.bin | Bin 100 -> 133 bytes tests/unit/libutil/data/git/tree.txt | 1 + tests/unit/libutil/git.cc | 30 ++++++++-- 6 files changed, 110 insertions(+), 26 deletions(-) diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 5733531fa..0b6e35222 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -56,31 +56,63 @@ void parseBlob( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, - bool executable, + BlobMode blobMode, const ExperimentalFeatureSettings & xpSettings) { xpSettings.require(Xp::GitHashing); - sink.createRegularFile(sinkPath, [&](auto & crf) { - if (executable) - crf.isExecutable(); + unsigned long long size = std::stoi(getStringUntil(source, 0)); - unsigned long long size = std::stoi(getStringUntil(source, 0)); + auto doRegularFile = [&](bool executable) { + sink.createRegularFile(sinkPath, [&](auto & crf) { + if (executable) + crf.isExecutable(); - crf.preallocateContents(size); + crf.preallocateContents(size); - unsigned long long left = size; - std::string buf; - buf.reserve(65536); + unsigned long long left = size; + std::string buf; + buf.reserve(65536); - while (left) { + while (left) { + checkInterrupt(); + buf.resize(std::min((unsigned long long)buf.capacity(), left)); + source(buf); + crf(buf); + left -= buf.size(); + } + }); + }; + + switch (blobMode) { + + case BlobMode::Regular: + doRegularFile(false); + break; + + case BlobMode::Executable: + doRegularFile(true); + break; + + case BlobMode::Symlink: + { + std::string target; + target.resize(size, '0'); + target.reserve(size); + for (size_t n = 0; n < target.size();) { checkInterrupt(); - buf.resize(std::min((unsigned long long)buf.capacity(), left)); - source(buf); - crf(buf); - left -= buf.size(); + n += source.read( + const_cast(target.c_str()) + n, + target.size() - n); } - }); + + sink.createSymlink(sinkPath, target); + break; + } + + default: + assert(false); + } } void parseTree( @@ -142,7 +174,7 @@ void parse( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, - bool executable, + BlobMode rootModeIfBlob, std::function hook, const ExperimentalFeatureSettings & xpSettings) { @@ -152,7 +184,7 @@ void parse( switch (type) { case ObjectType::Blob: - parseBlob(sink, sinkPath, source, executable, xpSettings); + parseBlob(sink, sinkPath, source, rootModeIfBlob, xpSettings); break; case ObjectType::Tree: parseTree(sink, sinkPath, source, hook, xpSettings); @@ -177,7 +209,7 @@ std::optional convertMode(SourceAccessor::Type type) void restore(FileSystemObjectSink & sink, Source & source, std::function hook) { - parse(sink, "", source, false, [&](Path name, TreeEntry entry) { + parse(sink, "", source, BlobMode::Regular, [&](Path name, TreeEntry entry) { auto [accessor, from] = hook(entry.hash); auto stat = accessor->lstat(from); auto gotOpt = convertMode(stat.type); @@ -275,6 +307,13 @@ Mode dump( } case SourceAccessor::tSymlink: + { + auto target = accessor.readLink(path); + dumpBlobPrefix(target.size(), sink, xpSettings); + sink(target); + return Mode::Symlink; + } + case SourceAccessor::tMisc: default: throw Error("file '%1%' has an unsupported type", path); diff --git a/src/libutil/git.hh b/src/libutil/git.hh index d9eb138e1..cfea48fbe 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -75,10 +75,23 @@ ObjectType parseObjectType( Source & source, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); +/** + * These 3 modes are represented by blob objects. + * + * Sometimes we need this information to disambiguate how a blob is + * being used to better match our own "file system object" data model. + */ +enum struct BlobMode : RawMode +{ + Regular = static_cast(Mode::Regular), + Executable = static_cast(Mode::Executable), + Symlink = static_cast(Mode::Symlink), +}; + void parseBlob( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, - bool executable, + BlobMode blobMode, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); void parseTree( @@ -89,11 +102,15 @@ void parseTree( /** * Helper putting the previous three `parse*` functions together. + * + * @rootModeIfBlob How to interpret a root blob, for which there is no + * disambiguating dir entry to answer that questino. If the root it not + * a blob, this is ignored. */ void parse( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, - bool executable, + BlobMode rootModeIfBlob, std::function hook, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); diff --git a/tests/functional/git-hashing/simple.sh b/tests/functional/git-hashing/simple.sh index 74b0220f8..604e1a175 100644 --- a/tests/functional/git-hashing/simple.sh +++ b/tests/functional/git-hashing/simple.sh @@ -56,3 +56,12 @@ echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3) hash3=$(nix-store -q --hash $path3) test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv" + +rm -rf $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3/dir +touch $TEST_ROOT/dummy3/dir/file +ln -s './hello/world.txt' $TEST_ROOT/dummy3/dir/symlink +path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3) +hash3=$(nix-store -q --hash $path3) +test "$hash3" = "sha256:1dwazas8irzpar89s8k2bnp72imfw7kgg4aflhhsfnicg8h428f3" diff --git a/tests/unit/libutil/data/git/tree.bin b/tests/unit/libutil/data/git/tree.bin index 5256ec140702fef5f88bd5750caf7cd57c03e5ac..4ccd43e9a977a6c216f0fad5a15c30aaf20da778 100644 GIT binary patch delta 30 jcmYdkW#lL+N=;QTG%}gU9?NZLWB>#Tg{7qt6AeWHezyoG delta 14 VcmZo=Okpo6N=;R;G@8f}3jiOO1S0?d diff --git a/tests/unit/libutil/data/git/tree.txt b/tests/unit/libutil/data/git/tree.txt index be3d02920..cd40b6a55 100644 --- a/tests/unit/libutil/data/git/tree.txt +++ b/tests/unit/libutil/data/git/tree.txt @@ -1,3 +1,4 @@ 100644 blob 63ddb340119baf8492d2da53af47e8c7cfcd5eb2 Foo 100755 blob 63ddb340119baf8492d2da53af47e8c7cfcd5eb2 bAr 040000 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 baZ +120000 blob 63ddb340119baf8492d2da53af47e8c7cfcd5eb2 quuX diff --git a/tests/unit/libutil/git.cc b/tests/unit/libutil/git.cc index 76ef86bcf..4f92488d6 100644 --- a/tests/unit/libutil/git.cc +++ b/tests/unit/libutil/git.cc @@ -67,7 +67,7 @@ TEST_F(GitTest, blob_read) { StringSink out; RegularFileSink out2 { out }; ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob); - parseBlob(out2, "", in, false, mockXpSettings); + parseBlob(out2, "", in, BlobMode::Regular, mockXpSettings); auto expected = readFile(goldenMaster("hello-world.bin")); @@ -115,6 +115,15 @@ const static Tree tree = { .hash = Hash::parseAny("4b825dc642cb6eb9a060e54bf8d69288fbee4904", HashAlgorithm::SHA1), }, }, + { + "quuX", + { + .mode = Mode::Symlink, + // hello world with special chars from above (symlink target + // can be anything) + .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1), + }, + }, }; TEST_F(GitTest, tree_read) { @@ -165,6 +174,12 @@ TEST_F(GitTest, both_roundrip) { .contents = "good day,\n\0\n\tworld!", }, }, + { + "quux", + File::Symlink { + .target = "/over/there", + }, + }, }, }, }, @@ -195,21 +210,24 @@ TEST_F(GitTest, both_roundrip) { MemorySink sinkFiles2 { files2 }; - std::function mkSinkHook; - mkSinkHook = [&](auto prefix, auto & hash, auto executable) { + std::function mkSinkHook; + mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) { StringSource in { cas[hash] }; parse( - sinkFiles2, prefix, in, executable, + sinkFiles2, prefix, in, blobMode, [&](const Path & name, const auto & entry) { mkSinkHook( prefix + "/" + name, entry.hash, - entry.mode == Mode::Executable); + // N.B. this cast would not be acceptable in real + // code, because it would make an assert reachable, + // but it should harmless in this test. + static_cast(entry.mode)); }, mockXpSettings); }; - mkSinkHook("", root.hash, false); + mkSinkHook("", root.hash, BlobMode::Regular); ASSERT_EQ(files, files2); }