Revert "Remove dead Git code"

This commit is contained in:
Théophane Hufschmitt 2024-02-27 06:39:30 +01:00 committed by GitHub
parent 4c7f0ef6ca
commit be0052b45f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 699 additions and 0 deletions

View file

@ -5,6 +5,52 @@
namespace nix {
void copyRecursive(
SourceAccessor & accessor, const CanonPath & from,
FileSystemObjectSink & sink, const Path & to)
{
auto stat = accessor.lstat(from);
switch (stat.type) {
case SourceAccessor::tSymlink:
{
sink.createSymlink(to, accessor.readLink(from));
break;
}
case SourceAccessor::tRegular:
{
sink.createRegularFile(to, [&](CreateRegularFileSink & crf) {
if (stat.isExecutable)
crf.isExecutable();
accessor.readFile(from, crf, [&](uint64_t size) {
crf.preallocateContents(size);
});
});
break;
}
case SourceAccessor::tDirectory:
{
sink.createDirectory(to);
for (auto & [name, _] : accessor.readDirectory(from)) {
copyRecursive(
accessor, from / name,
sink, to + "/" + name);
break;
}
break;
}
case SourceAccessor::tMisc:
throw Error("file '%1%' has an unsupported type", from);
default:
abort();
}
}
struct RestoreSinkSettings : Config
{
Setting<bool> preallocateContents{this, false, "preallocate-contents",

View file

@ -41,6 +41,13 @@ struct FileSystemObjectSink
virtual void createSymlink(const Path & path, const std::string & target) = 0;
};
/**
* Recursively copy file system objects from the source into the sink.
*/
void copyRecursive(
SourceAccessor & accessor, const CanonPath & sourcePath,
FileSystemObjectSink & sink, const Path & destPath);
/**
* Ignore everything and do nothing
*/

View file

@ -5,13 +5,302 @@
#include <regex>
#include <strings.h> // for strcasecmp
#include "signals.hh"
#include "config.hh"
#include "hash.hh"
#include "posix-source-accessor.hh"
#include "git.hh"
#include "serialise.hh"
namespace nix::git {
using namespace nix;
using namespace std::string_literals;
std::optional<Mode> decodeMode(RawMode m) {
switch (m) {
case (RawMode) Mode::Directory:
case (RawMode) Mode::Executable:
case (RawMode) Mode::Regular:
case (RawMode) Mode::Symlink:
return (Mode) m;
default:
return std::nullopt;
}
}
static std::string getStringUntil(Source & source, char byte)
{
std::string s;
char n[1];
source(std::string_view { n, 1 });
while (*n != byte) {
s += *n;
source(std::string_view { n, 1 });
}
return s;
}
static std::string getString(Source & source, int n)
{
std::string v;
v.resize(n);
source(v);
return v;
}
void parseBlob(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
bool executable,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
sink.createRegularFile(sinkPath, [&](auto & crf) {
if (executable)
crf.isExecutable();
unsigned long long size = std::stoi(getStringUntil(source, 0));
crf.preallocateContents(size);
unsigned long long left = size;
std::string buf;
buf.reserve(65536);
while (left) {
checkInterrupt();
buf.resize(std::min((unsigned long long)buf.capacity(), left));
source(buf);
crf(buf);
left -= buf.size();
}
});
}
void parseTree(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
unsigned long long size = std::stoi(getStringUntil(source, 0));
unsigned long long left = size;
sink.createDirectory(sinkPath);
while (left) {
std::string perms = getStringUntil(source, ' ');
left -= perms.size();
left -= 1;
RawMode rawMode = std::stoi(perms, 0, 8);
auto modeOpt = decodeMode(rawMode);
if (!modeOpt)
throw Error("Unknown Git permission: %o", perms);
auto mode = std::move(*modeOpt);
std::string name = getStringUntil(source, '\0');
left -= name.size();
left -= 1;
std::string hashs = getString(source, 20);
left -= 20;
Hash hash(HashAlgorithm::SHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);
hook(name, TreeEntry {
.mode = mode,
.hash = hash,
});
}
}
ObjectType parseObjectType(
Source & source,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto type = getString(source, 5);
if (type == "blob ") {
return ObjectType::Blob;
} else if (type == "tree ") {
return ObjectType::Tree;
} else throw Error("input doesn't look like a Git object");
}
void parse(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
bool executable,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto type = parseObjectType(source, xpSettings);
switch (type) {
case ObjectType::Blob:
parseBlob(sink, sinkPath, source, executable, xpSettings);
break;
case ObjectType::Tree:
parseTree(sink, sinkPath, source, hook, xpSettings);
break;
default:
assert(false);
};
}
std::optional<Mode> convertMode(SourceAccessor::Type type)
{
switch (type) {
case SourceAccessor::tSymlink: return Mode::Symlink;
case SourceAccessor::tRegular: return Mode::Regular;
case SourceAccessor::tDirectory: return Mode::Directory;
case SourceAccessor::tMisc: return std::nullopt;
default: abort();
}
}
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook)
{
parse(sink, "", source, false, [&](Path name, TreeEntry entry) {
auto [accessor, from] = hook(entry.hash);
auto stat = accessor->lstat(from);
auto gotOpt = convertMode(stat.type);
if (!gotOpt)
throw Error("file '%s' (git hash %s) has an unsupported type",
from,
entry.hash.to_string(HashFormat::Base16, false));
auto & got = *gotOpt;
if (got != entry.mode)
throw Error("git mode of file '%s' (git hash %s) is %o but expected %o",
from,
entry.hash.to_string(HashFormat::Base16, false),
(RawMode) got,
(RawMode) entry.mode);
copyRecursive(
*accessor, from,
sink, name);
});
}
void dumpBlobPrefix(
uint64_t size, Sink & sink,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto s = fmt("blob %d\0"s, std::to_string(size));
sink(s);
}
void dumpTree(const Tree & entries, Sink & sink,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
std::string v1;
for (auto & [name, entry] : entries) {
auto name2 = name;
if (entry.mode == Mode::Directory) {
assert(name2.back() == '/');
name2.pop_back();
}
v1 += fmt("%o %s\0"s, static_cast<RawMode>(entry.mode), name2);
std::copy(entry.hash.hash, entry.hash.hash + entry.hash.hashSize, std::back_inserter(v1));
}
{
auto s = fmt("tree %d\0"s, v1.size());
sink(s);
}
sink(v1);
}
Mode dump(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
std::function<DumpHook> hook,
PathFilter & filter,
const ExperimentalFeatureSettings & xpSettings)
{
auto st = accessor.lstat(path);
switch (st.type) {
case SourceAccessor::tRegular:
{
accessor.readFile(path, sink, [&](uint64_t size) {
dumpBlobPrefix(size, sink, xpSettings);
});
return st.isExecutable
? Mode::Executable
: Mode::Regular;
}
case SourceAccessor::tDirectory:
{
Tree entries;
for (auto & [name, _] : accessor.readDirectory(path)) {
auto child = path / name;
if (!filter(child.abs())) continue;
auto entry = hook(child);
auto name2 = name;
if (entry.mode == Mode::Directory)
name2 += "/";
entries.insert_or_assign(std::move(name2), std::move(entry));
}
dumpTree(entries, sink, xpSettings);
return Mode::Directory;
}
case SourceAccessor::tSymlink:
case SourceAccessor::tMisc:
default:
throw Error("file '%1%' has an unsupported type", path);
}
}
TreeEntry dumpHash(
HashAlgorithm ha,
SourceAccessor & accessor, const CanonPath & path, PathFilter & filter)
{
std::function<DumpHook> hook;
hook = [&](const CanonPath & path) -> TreeEntry {
auto hashSink = HashSink(ha);
auto mode = dump(accessor, path, hashSink, hook, filter);
auto hash = hashSink.finish().first;
return {
.mode = mode,
.hash = hash,
};
};
return hook(path);
}
std::optional<LsRemoteRefLine> parseLsRemoteLine(std::string_view line)
{
const static std::regex line_regex("^(ref: *)?([^\\s]+)(?:\\t+(.*))?$");

View file

@ -5,8 +5,160 @@
#include <string_view>
#include <optional>
#include "types.hh"
#include "serialise.hh"
#include "hash.hh"
#include "source-accessor.hh"
#include "fs-sink.hh"
namespace nix::git {
enum struct ObjectType {
Blob,
Tree,
//Commit,
//Tag,
};
using RawMode = uint32_t;
enum struct Mode : RawMode {
Directory = 0040000,
Regular = 0100644,
Executable = 0100755,
Symlink = 0120000,
};
std::optional<Mode> decodeMode(RawMode m);
/**
* An anonymous Git tree object entry (no name part).
*/
struct TreeEntry
{
Mode mode;
Hash hash;
GENERATE_CMP(TreeEntry, me->mode, me->hash);
};
/**
* A Git tree object, fully decoded and stored in memory.
*
* Directory names must end in a `/` for sake of sorting. See
* https://github.com/mirage/irmin/issues/352
*/
using Tree = std::map<std::string, TreeEntry>;
/**
* Callback for processing a child hash with `parse`
*
* The function should
*
* 1. Obtain the file system objects denoted by `gitHash`
*
* 2. Ensure they match `mode`
*
* 3. Feed them into the same sink `parse` was called with
*
* Implementations may seek to memoize resources (bandwidth, storage,
* etc.) for the same Git hash.
*/
using SinkHook = void(const Path & name, TreeEntry entry);
/**
* Parse the "blob " or "tree " prefix.
*
* @throws if prefix not recognized
*/
ObjectType parseObjectType(
Source & source,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
void parseBlob(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
bool executable,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
void parseTree(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Helper putting the previous three `parse*` functions together.
*/
void parse(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
bool executable,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Assists with writing a `SinkHook` step (2).
*/
std::optional<Mode> convertMode(SourceAccessor::Type type);
/**
* Simplified version of `SinkHook` for `restore`.
*
* Given a `Hash`, return a `SourceAccessor` and `CanonPath` pointing to
* the file system object with that path.
*/
using RestoreHook = std::pair<SourceAccessor *, CanonPath>(Hash);
/**
* Wrapper around `parse` and `RestoreSink`
*/
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook);
/**
* Dumps a single file to a sink
*
* @param xpSettings for testing purposes
*/
void dumpBlobPrefix(
uint64_t size, Sink & sink,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Dumps a representation of a git tree to a sink
*/
void dumpTree(
const Tree & entries, Sink & sink,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Callback for processing a child with `dump`
*
* The function should return the Git hash and mode of the file at the
* given path in the accessor passed to `dump`.
*
* Note that if the child is a directory, its child in must also be so
* processed in order to compute this information.
*/
using DumpHook = TreeEntry(const CanonPath & path);
Mode dump(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
std::function<DumpHook> hook,
PathFilter & filter = defaultPathFilter,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Recursively dumps path, hashing as we go.
*
* A smaller wrapper around `dump`.
*/
TreeEntry dumpHash(
HashAlgorithm ha,
SourceAccessor & accessor, const CanonPath & path,
PathFilter & filter = defaultPathFilter);
/**
* A line from the output of `git ls-remote --symref`.
*

View file

@ -9,6 +9,211 @@ namespace nix {
using namespace git;
class GitTest : public CharacterizationTest
{
Path unitTestData = getUnitTestData() + "/git";
public:
Path goldenMaster(std::string_view testStem) const override {
return unitTestData + "/" + testStem;
}
/**
* We set these in tests rather than the regular globals so we don't have
* to worry about race conditions if the tests run concurrently.
*/
ExperimentalFeatureSettings mockXpSettings;
private:
void SetUp() override
{
mockXpSettings.set("experimental-features", "git-hashing");
}
};
TEST(GitMode, gitMode_directory) {
Mode m = Mode::Directory;
RawMode r = 0040000;
ASSERT_EQ(static_cast<RawMode>(m), r);
ASSERT_EQ(decodeMode(r), std::optional { m });
};
TEST(GitMode, gitMode_executable) {
Mode m = Mode::Executable;
RawMode r = 0100755;
ASSERT_EQ(static_cast<RawMode>(m), r);
ASSERT_EQ(decodeMode(r), std::optional { m });
};
TEST(GitMode, gitMode_regular) {
Mode m = Mode::Regular;
RawMode r = 0100644;
ASSERT_EQ(static_cast<RawMode>(m), r);
ASSERT_EQ(decodeMode(r), std::optional { m });
};
TEST(GitMode, gitMode_symlink) {
Mode m = Mode::Symlink;
RawMode r = 0120000;
ASSERT_EQ(static_cast<RawMode>(m), r);
ASSERT_EQ(decodeMode(r), std::optional { m });
};
TEST_F(GitTest, blob_read) {
readTest("hello-world-blob.bin", [&](const auto & encoded) {
StringSource in { encoded };
StringSink out;
RegularFileSink out2 { out };
ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob);
parseBlob(out2, "", in, false, mockXpSettings);
auto expected = readFile(goldenMaster("hello-world.bin"));
ASSERT_EQ(out.s, expected);
});
}
TEST_F(GitTest, blob_write) {
writeTest("hello-world-blob.bin", [&]() {
auto decoded = readFile(goldenMaster("hello-world.bin"));
StringSink s;
dumpBlobPrefix(decoded.size(), s, mockXpSettings);
s(decoded);
return s.s;
});
}
/**
* This data is for "shallow" tree tests. However, we use "real" hashes
* so that we can check our test data in a small shell script test test
* (`tests/unit/libutil/data/git/check-data.sh`).
*/
const static Tree tree = {
{
"Foo",
{
.mode = Mode::Regular,
// hello world with special chars from above
.hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1),
},
},
{
"bAr",
{
.mode = Mode::Executable,
// ditto
.hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", HashAlgorithm::SHA1),
},
},
{
"baZ/",
{
.mode = Mode::Directory,
// Empty directory hash
.hash = Hash::parseAny("4b825dc642cb6eb9a060e54bf8d69288fbee4904", HashAlgorithm::SHA1),
},
},
};
TEST_F(GitTest, tree_read) {
readTest("tree.bin", [&](const auto & encoded) {
StringSource in { encoded };
NullFileSystemObjectSink out;
Tree got;
ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Tree);
parseTree(out, "", in, [&](auto & name, auto entry) {
auto name2 = name;
if (entry.mode == Mode::Directory)
name2 += '/';
got.insert_or_assign(name2, std::move(entry));
}, mockXpSettings);
ASSERT_EQ(got, tree);
});
}
TEST_F(GitTest, tree_write) {
writeTest("tree.bin", [&]() {
StringSink s;
dumpTree(tree, s, mockXpSettings);
return s.s;
});
}
TEST_F(GitTest, both_roundrip) {
using File = MemorySourceAccessor::File;
MemorySourceAccessor files;
files.root = File::Directory {
.contents {
{
"foo",
File::Regular {
.contents = "hello\n\0\n\tworld!",
},
},
{
"bar",
File::Directory {
.contents = {
{
"baz",
File::Regular {
.executable = true,
.contents = "good day,\n\0\n\tworld!",
},
},
},
},
},
},
};
std::map<Hash, std::string> cas;
std::function<DumpHook> dumpHook;
dumpHook = [&](const CanonPath & path) {
StringSink s;
HashSink hashSink { HashAlgorithm::SHA1 };
TeeSink s2 { s, hashSink };
auto mode = dump(
files, path, s2, dumpHook,
defaultPathFilter, mockXpSettings);
auto hash = hashSink.finish().first;
cas.insert_or_assign(hash, std::move(s.s));
return TreeEntry {
.mode = mode,
.hash = hash,
};
};
auto root = dumpHook(CanonPath::root);
MemorySourceAccessor files2;
MemorySink sinkFiles2 { files2 };
std::function<void(const Path, const Hash &, bool)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto executable) {
StringSource in { cas[hash] };
parse(
sinkFiles2, prefix, in, executable,
[&](const Path & name, const auto & entry) {
mkSinkHook(
prefix + "/" + name,
entry.hash,
entry.mode == Mode::Executable);
},
mockXpSettings);
};
mkSinkHook("", root.hash, false);
ASSERT_EQ(files, files2);
}
TEST(GitLsRemote, parseSymrefLineWithReference) {
auto line = "ref: refs/head/main HEAD";
auto res = parseLsRemoteLine(line);