Add Git object hashing to the store layer

Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
This commit is contained in:
John Ericson 2023-09-04 09:51:23 -04:00
parent 04836c73e5
commit 201551c937
27 changed files with 484 additions and 86 deletions

View file

@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \ makefiles += \
tests/functional/local.mk \ tests/functional/local.mk \
tests/functional/ca/local.mk \ tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \ tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \ tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk tests/functional/plugins/local.mk

View file

@ -89,15 +89,20 @@ where
- `rec` = one of: - `rec` = one of:
- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization
- ```ebnf - ```ebnf
| "r:" | "r:"
``` ```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization
- ```ebnf - ```ebnf
| "" | "git:"
``` ```
(empty string) for hashes of the flat (single file) serialization hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format
- ```ebnf - ```ebnf
algo = "md5" | "sha1" | "sha256" algo = "md5" | "sha1" | "sha256"

View file

@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath( Hash h = hashPath(
accessor, canonPath, accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) { } catch (Error & e) {

View file

@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) { auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive; if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat; else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") { else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations); experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {}; ingestionMethod = TextIngestionMethod {};
} else } else

View file

@ -324,6 +324,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = dump2.s; nar = dump2.s;
break; break;
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a // The dump is Flat, so we need to convert it to NAR with a
// single file. // single file.
StringSink s; StringSink s;
@ -331,6 +332,10 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s); nar = std::move(s.s);
break; break;
} }
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else { } else {
// Otherwise, we have to do th same hashing as NAR so our single // Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes. // hash will suffice for both purposes.
@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */ implementation of this method in terms of addToStoreFromDump. */
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter); accessor.dumpPath(path, sink, filter);

View file

@ -147,7 +147,7 @@ public:
void narFromPath(const StorePath & path, Sink & sink) override; void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
void addSignatures(const StorePath & storePath, const StringSet & sigs) override; void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

View file

@ -8,6 +8,7 @@
#include "finally.hh" #include "finally.hh"
#include "util.hh" #include "util.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "compression.hh" #include "compression.hh"
#include "daemon.hh" #include "daemon.hh"
#include "topo-sort.hh" #include "topo-sort.hh"
@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites); rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */ /* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() }; std::string oldHashPart { scratchPath->hashPart() };
auto got = ({ auto got = [&]{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
dumpPath( auto fim = outputHash.method.getFileIngestionMethod();
accessor, CanonPath { actualPath }, switch (fim) {
caSink, case FileIngestionMethod::Flat:
outputHash.method.getFileIngestionMethod()); case FileIngestionMethod::Recursive:
caSink.finish().first; {
}); HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();
ValidPathInfo newInfo0 { ValidPathInfo newInfo0 {
worker.store, worker.store,
@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath( HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath }, accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first; newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second; newInfo0.narSize = narHashAndSize.second;
} }
@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath( HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath }, accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first }; ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second; newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs(); auto refs = rewriteRefs();

View file

@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path))) if (!pathExists(store.printStorePath(path)))
res = false; res = false;
else { else {
HashResult current = hashPath( Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) }, *store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo); FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256); Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first; res = info->narHash == nullHash || info->narHash == current;
} }
pathContentsGoodCache.insert_or_assign(path, res); pathContentsGoodCache.insert_or_assign(path, res);
if (!res) if (!res)

View file

@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return ""; return "";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "r:"; return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default: default:
throw Error("impossible, caught both cases"); throw Error("impossible, caught both cases");
} }
@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) { if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive; return FileIngestionMethod::Recursive;
} }
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) { else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {}; return TextIngestionMethod {};
} }
@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat; auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:")) if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive; method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_(); HashAlgorithm hashAlgo = parseHashAlgorithm_();
return { return {
std::move(method), std::move(method),

View file

@ -13,6 +13,7 @@
#include "archive.hh" #include "archive.hh"
#include "derivations.hh" #include "derivations.hh"
#include "args.hh" #include "args.hh"
#include "git.hh"
namespace nix::daemon { namespace nix::daemon {
@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved); TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */ NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource); parseDump(sink, savedNARSource);
} else { } else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the /* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into metadata, and streaming the sole file we expect into
`saved`. */ `saved`. */
RegularFileSink savedRegular { saved }; RegularFileSink savedRegular { saved };
parseDump(savedRegular, from); parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected"); if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
} }
}); });
logger->startWork(); logger->startWork();

View file

@ -43,7 +43,7 @@ public:
LocalFSStore(const Params & params); LocalFSStore(const Params & params);
void narFromPath(const StorePath & path, Sink & sink) override; void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
/** /**
* Creates symlink from the `gcRoot` to the `storePath` and * Creates symlink from the `gcRoot` to the `storePath` and

View file

@ -1,5 +1,6 @@
#include "local-store.hh" #include "local-store.hh"
#include "globals.hh" #include "globals.hh"
#include "git.hh"
#include "archive.hh" #include "archive.hh"
#include "pathlocks.hh" #include "pathlocks.hh"
#include "worker-protocol.hh" #include "worker-protocol.hh"
@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
if (info.ca) { if (info.ca) {
auto & specified = *info.ca; auto & specified = *info.ca;
auto actualHash = ({ auto actualHash = ({
HashModuloSink caSink { auto accessor = getFSAccessor(false);
specified.hash.algo, CanonPath path { printStorePath(info.path) };
std::string { info.path.hashPart() }, Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
}; auto fim = specified.method.getFileIngestionMethod();
PosixSourceAccessor accessor; switch (fim) {
dumpPath( case FileIngestionMethod::Flat:
*getFSAccessor(false), case FileIngestionMethod::Recursive:
CanonPath { printStorePath(info.path) }, {
caSink, HashModuloSink caSink {
specified.method.getFileIngestionMethod()); specified.hash.algo,
std::string { info.path.hashPart() },
};
dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim);
h = caSink.finish().first;
break;
}
case FileIngestionMethod::Git:
h = git::dumpHash(specified.hash.algo, *accessor, path).hash;
break;
}
ContentAddress { ContentAddress {
.method = specified.method, .method = specified.method,
.hash = caSink.finish().first, .hash = std::move(h),
}; };
}); });
if (specified.hash != actualHash.hash) { if (specified.hash != actualHash.hash) {
@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir); delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x"; tempPath = tempDir + "/x";
restorePath(tempPath, bothSource, method.getFileIngestionMethod()); auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
dumpBuffer.reset(); dumpBuffer.reset();
dump = {}; dump = {};
@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump(
if (inMemory) { if (inMemory) {
StringSource dumpSource { dump }; StringSource dumpSource { dump };
/* Restore from the buffer in memory. */ /* Restore from the buffer in memory. */
restorePath(realPath, dumpSource, method.getFileIngestionMethod()); auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
} else { } else {
/* Move the temporary path we restored above. */ /* Move the temporary path we restored above. */
moveFile(tempPath, realPath); moveFile(tempPath, realPath);
@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
std::string hash = hashPath( std::string hash = hashPath(
accessor, CanonPath { linkPath }, accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false);
if (hash != link.name) { if (hash != link.name) {
printError("link '%s' was modified! expected hash '%s', got '%s'", printError("link '%s' was modified! expected hash '%s', got '%s'",
linkPath, link.name, hash); linkPath, link.name, hash);

View file

@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
hashPath( hashPath(
accessor, CanonPath { path }, accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
}); });
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true)); debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));
@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
hashPath( hashPath(
accessor, CanonPath { linkPath }, accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
}))) })))
{ {
// XXX: Consider overwriting linkPath with our valid version. // XXX: Consider overwriting linkPath with our valid version.

View file

@ -13,6 +13,7 @@
#include "derivations.hh" #include "derivations.hh"
#include "pool.hh" #include "pool.hh"
#include "finally.hh" #include "finally.hh"
#include "git.hh"
#include "logging.hh" #include "logging.hh"
#include "callback.hh" #include "callback.hh"
#include "filetransfer.hh" #include "filetransfer.hh"

View file

@ -184,7 +184,7 @@ protected:
friend struct ConnectionHandle; friend struct ConnectionHandle;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
virtual void narFromPath(const StorePath & path, Sink & sink) override; virtual void narFromPath(const StorePath & path, Sink & sink) override;

View file

@ -12,7 +12,9 @@
#include "references.hh" #include "references.hh"
#include "archive.hh" #include "archive.hh"
#include "callback.hh" #include "callback.hh"
#include "git.hh"
#include "remote-store.hh" #include "remote-store.hh"
#include "posix-source-accessor.hh"
// FIXME this should not be here, see TODO below on // FIXME this should not be here, see TODO below on
// `addMultipleToStore`. // `addMultipleToStore`.
#include "worker-protocol.hh" #include "worker-protocol.hh"
@ -119,6 +121,9 @@ static std::string makeType(
StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{ {
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
throw Error("Git file ingestion must use SHA-1 hash");
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) { if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name); return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
} else { } else {
@ -166,7 +171,7 @@ std::pair<StorePath, Hash> StoreDirConfig::computeStorePath(
const StorePathSet & references, const StorePathSet & references,
PathFilter & filter) const PathFilter & filter) const
{ {
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
return { return {
makeFixedOutputPathFromCA( makeFixedOutputPathFromCA(
name, name,
@ -193,7 +198,37 @@ StorePath Store::addToStore(
RepairFlag repair) RepairFlag repair)
{ {
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter); auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter);
break;
}
case FileIngestionMethod::Git:
{
git::dump(
accessor, path,
sink,
// recursively add to store if path is a directory
[&](const CanonPath & path) -> git::TreeEntry {
auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair);
auto info = queryPathInfo(storePath);
assert(info->ca);
assert(info->ca->method == FileIngestionMethod::Git);
auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath)));
auto gitModeOpt = git::convertMode(stat.type);
assert(gitModeOpt);
return {
.mode = *gitModeOpt,
.hash = info->ca->hash,
};
},
filter);
break;
}
}
}); });
return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); return addToStoreFromDump(*source, name, method, hashAlgo, references, repair);
} }
@ -355,9 +390,7 @@ ValidPathInfo Store::addToStoreSlow(
NullFileSystemObjectSink blank; NullFileSystemObjectSink blank;
auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat
? (FileSystemObjectSink &) fileSink ? (FileSystemObjectSink &) fileSink
: method.getFileIngestionMethod() == FileIngestionMethod::Recursive : (FileSystemObjectSink &) blank; // for recursive or git we do recursive
? (FileSystemObjectSink &) blank
: (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases
/* The information that flows from tapped (besides being replicated in /* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */ narSink), is now put in parseSink. */
@ -369,6 +402,8 @@ ValidPathInfo Store::addToStoreSlow(
auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256 auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256
? narHash ? narHash
: method == FileIngestionMethod::Git
? git::dumpHash(hashAlgo, accessor, srcPath).hash
: caHashSink.finish().first; : caHashSink.finish().first;
if (expectedCAHash && expectedCAHash != hash) if (expectedCAHash && expectedCAHash != hash)

View file

@ -35,7 +35,7 @@ public:
static std::set<std::string> uriSchemes() static std::set<std::string> uriSchemes()
{ return {"unix"}; } { return {"unix"}; }
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override
{ return LocalFSStore::getFSAccessor(requireValidPath); } { return LocalFSStore::getFSAccessor(requireValidPath); }
void narFromPath(const StorePath & path, Sink & sink) override void narFromPath(const StorePath & path, Sink & sink) override

View file

@ -1,16 +1,53 @@
#include "file-content-address.hh" #include "file-content-address.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
namespace nix { namespace nix {
FileIngestionMethod parseFileIngestionMethod(std::string_view input) static std::optional<FileSerialisationMethod> parseFileSerialisationMethodOpt(std::string_view input)
{ {
if (input == "flat") { if (input == "flat") {
return FileIngestionMethod::Flat; return FileSerialisationMethod::Flat;
} else if (input == "nar") { } else if (input == "nar") {
return FileIngestionMethod::Recursive; return FileSerialisationMethod::Recursive;
} else { } else {
throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`"); return std::nullopt;
}
}
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input)
{
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return *ret;
else
throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`");
}
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
{
if (input == "git") {
return FileIngestionMethod::Git;
} else {
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return static_cast<FileIngestionMethod>(*ret);
else
throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`");
}
}
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method)
{
switch (method) {
case FileSerialisationMethod::Flat:
return "flat";
case FileSerialisationMethod::Recursive:
return "nar";
default:
assert(false);
} }
} }
@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
return "flat";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "nar"; return renderFileSerialisationMethod(
static_cast<FileSerialisationMethod>(method));
case FileIngestionMethod::Git:
return "git";
default: default:
abort(); abort();
} }
@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
void dumpPath( void dumpPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
Sink & sink, Sink & sink,
FileIngestionMethod method, FileSerialisationMethod method,
PathFilter & filter) PathFilter & filter)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileSerialisationMethod::Flat:
accessor.readFile(path, sink); accessor.readFile(path, sink);
break; break;
case FileIngestionMethod::Recursive: case FileSerialisationMethod::Recursive:
accessor.dumpPath(path, sink, filter); accessor.dumpPath(path, sink, filter);
break; break;
} }
@ -48,13 +87,13 @@ void dumpPath(
void restorePath( void restorePath(
const Path & path, const Path & path,
Source & source, Source & source,
FileIngestionMethod method) FileSerialisationMethod method)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileSerialisationMethod::Flat:
writeFile(path, source); writeFile(path, source);
break; break;
case FileIngestionMethod::Recursive: case FileSerialisationMethod::Recursive:
restorePath(path, source); restorePath(path, source);
break; break;
} }
@ -63,7 +102,7 @@ void restorePath(
HashResult hashPath( HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha, FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter) PathFilter & filter)
{ {
HashSink sink { ha }; HashSink sink { ha };
@ -71,4 +110,20 @@ HashResult hashPath(
return sink.finish(); return sink.finish();
} }
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ht,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first;
case FileIngestionMethod::Git:
return git::dumpHash(ht, accessor, path, filter).hash;
}
}
} }

View file

@ -8,37 +8,38 @@
namespace nix { namespace nix {
/** /**
* An enumeration of the main ways we can serialize file system * An enumeration of the ways we can serialize file system
* objects. * objects.
*/ */
enum struct FileIngestionMethod : uint8_t { enum struct FileSerialisationMethod : uint8_t {
/** /**
* Flat-file hashing. Directly ingest the contents of a single file * Flat-file. The contents of a single file exactly.
*/ */
Flat = 0, Flat,
/** /**
* Recursive (or NAR) hashing. Serializes the file-system object in * Nix Archive. Serializes the file-system object in
* Nix Archive format and ingest that. * Nix Archive format.
*/ */
Recursive = 1, Recursive,
}; };
/** /**
* Parse a `FileIngestionMethod` by name. Choice of: * Parse a `FileSerialisationMethod` by name. Choice of:
* *
* - `flat`: `FileIngestionMethod::Flat` * - `flat`: `FileSerialisationMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive` * - `nar`: `FileSerialisationMethod::Recursive`
* *
* Oppostite of `renderFileIngestionMethod`. * Opposite of `renderFileSerialisationMethod`.
*/ */
FileIngestionMethod parseFileIngestionMethod(std::string_view input); FileSerialisationMethod parseFileSerialisationMethod(std::string_view input);
/** /**
* Render a `FileIngestionMethod` by name. * Render a `FileSerialisationMethod` by name.
* *
* Oppostite of `parseFileIngestionMethod`. * Opposite of `parseFileSerialisationMethod`.
*/ */
std::string_view renderFileIngestionMethod(FileIngestionMethod method); std::string_view renderFileSerialisationMethod(FileSerialisationMethod method);
/** /**
* Dump a serialization of the given file system object. * Dump a serialization of the given file system object.
@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method);
void dumpPath( void dumpPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
Sink & sink, Sink & sink,
FileIngestionMethod method, FileSerialisationMethod method,
PathFilter & filter = defaultPathFilter); PathFilter & filter = defaultPathFilter);
/** /**
* Restore a serialization of the given file system object. * Restore a serialisation of the given file system object.
* *
* @TODO use an arbitrary `FileSystemObjectSink`. * @TODO use an arbitrary `FileSystemObjectSink`.
*/ */
void restorePath( void restorePath(
const Path & path, const Path & path,
Source & source, Source & source,
FileIngestionMethod method); FileSerialisationMethod method);
/** /**
* Compute the hash of the given file system object according to the * Compute the hash of the given file system object according to the
* given method. * given method.
* *
* The hash is defined as (essentially) hashString(ha, dumpPath(path)). * the hash is defined as (in pseudocode):
*
* ```
* hashString(ha, dumpPath(...))
* ```
*/ */
HashResult hashPath( HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);
/**
* An enumeration of the ways we can ingest file system
* objects, producing a hash or digest.
*/
enum struct FileIngestionMethod : uint8_t {
/**
* Hash `FileSerialisationMethod::Flat` serialisation.
*/
Flat,
/**
* Hash `FileSerialisationMethod::Git` serialisation.
*/
Recursive,
/**
* Git hashing. In particular files are hashed as git "blobs", and
* directories are hashed as git "trees".
*
* Unlike `Flat` and `Recursive`, this is not a hash of a single
* serialisation but a [Merkle
* DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple
* rounds of serialisation and hashing.
*
* @note Git's data model is slightly different, in that a plain
* file doesn't have an executable bit, directory entries do
* instead. We decide treat a bare file as non-executable by fiat,
* as we do with `FileIngestionMethod::Flat` which also lacks this
* information. Thus, Git can encode some but all of Nix's "File
* System Objects", and this sort of hashing is likewise partial.
*/
Git,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `git`: `FileIngestionMethod::Git`
*
* Opposite of `renderFileIngestionMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
*
* Opposite of `parseFileIngestionMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* Unlike the other `hashPath`, this works on an arbitrary
* `FileIngestionMethod` instead of `FileSerialisationMethod`, but
* doesn't return the size as this is this is not a both simple and
* useful defined for a merkle format.
*/
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha, FileIngestionMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter); PathFilter & filter = defaultPathFilter);

View file

@ -555,7 +555,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise)
HashResult hash = hashPath( HashResult hash = hashPath(
*store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) }, *store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
info->narHash = hash.first; info->narHash = hash.first;
info->narSize = hash.second; info->narSize = hash.second;
} }

View file

@ -2,6 +2,7 @@
#include "common-args.hh" #include "common-args.hh"
#include "store-api.hh" #include "store-api.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh" #include "posix-source-accessor.hh"
#include "misc-store-flags.hh" #include "misc-store-flags.hh"

View file

@ -5,6 +5,7 @@
#include "shared.hh" #include "shared.hh"
#include "references.hh" #include "references.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh" #include "posix-source-accessor.hh"
#include "misc-store-flags.hh" #include "misc-store-flags.hh"
@ -66,9 +67,11 @@ struct CmdHashBase : Command
{ {
switch (mode) { switch (mode) {
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
return "print cryptographic hash of a regular file"; return "print cryptographic hash of a regular file";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "print cryptographic hash of the NAR serialisation of a path"; return "print cryptographic hash of the NAR serialisation of a path";
case FileIngestionMethod::Git:
return "print cryptographic hash of the Git serialisation of a path";
default: default:
assert(false); assert(false);
}; };
@ -77,17 +80,41 @@ struct CmdHashBase : Command
void run() override void run() override
{ {
for (auto path : paths) { for (auto path : paths) {
auto makeSink = [&]() -> std::unique_ptr<AbstractHashSink> {
if (modulus)
return std::make_unique<HashModuloSink>(hashAlgo, *modulus);
else
return std::make_unique<HashSink>(hashAlgo);
};
std::unique_ptr<AbstractHashSink> hashSink; auto [accessor_, canonPath] = PosixSourceAccessor::createAtRoot(path);
if (modulus) auto & accessor = accessor_;
hashSink = std::make_unique<HashModuloSink>(hashAlgo, *modulus); Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
else switch (mode) {
hashSink = std::make_unique<HashSink>(hashAlgo); case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
auto hashSink = makeSink();
dumpPath(accessor, canonPath, *hashSink, (FileSerialisationMethod) mode);
h = hashSink->finish().first;
break;
}
case FileIngestionMethod::Git: {
std::function<git::DumpHook> hook;
hook = [&](const CanonPath & path) -> git::TreeEntry {
auto hashSink = makeSink();
auto mode = dump(accessor, path, *hashSink, hook);
auto hash = hashSink->finish().first;
return {
.mode = mode,
.hash = hash,
};
};
h = hook(canonPath).hash;
break;
}
}
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
dumpPath(accessor, canonPath, *hashSink, mode);
Hash h = hashSink->finish().first;
if (truncate && h.hashSize > 20) h = compressHash(h, 20); if (truncate && h.hashSize > 20) h = compressHash(h, 20);
logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI)); logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI));
} }

View file

@ -0,0 +1,11 @@
source ../common.sh
clearStore
clearCache
# Need backend to support git-hashing too
requireDaemonNewerThan "2.18.0pre20230908"
enableFeatures "git-hashing"
restartDaemon

View file

@ -0,0 +1,7 @@
git-hashing-tests := \
$(d)/simple.sh
install-tests-groups += git-hashing
clean-files += \
$(d)/config.nix

View file

@ -0,0 +1,58 @@
source common.sh
repo="$TEST_ROOT/scratch"
git init "$repo"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
try () {
hash=$(nix hash path --mode git --format base16 --algo sha1 $TEST_ROOT/hash-path)
[[ "$hash" == "$1" ]]
git -C "$repo" rm -rf hash-path || true
cp -r "$TEST_ROOT/hash-path" "$TEST_ROOT/scratch/hash-path"
git -C "$repo" add hash-path
git -C "$repo" commit -m "x"
git -C "$repo" status
hash2=$(git -C "$TEST_ROOT/scratch" rev-parse HEAD:hash-path)
[[ "$hash2" = "$1" ]]
}
# blob
rm -rf $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path
try "557db03de997c86a4a028e1ebd3a1ceb225be238"
# tree with children
rm -rf $TEST_ROOT/hash-path
mkdir $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path/hello
echo "Run Hello World" > $TEST_ROOT/hash-path/executable
chmod +x $TEST_ROOT/hash-path/executable
try "e5c0a11a556801a5c9dcf330ca9d7e2c572697f4"
rm -rf $TEST_ROOT/dummy1
echo Hello World! > $TEST_ROOT/dummy1
path1=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy1)
hash1=$(nix-store -q --hash $path1)
test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v"
rm -rf $TEST_ROOT/dummy2
mkdir -p $TEST_ROOT/dummy2
echo Hello World! > $TEST_ROOT/dummy2/hello
path2=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy2)
hash2=$(nix-store -q --hash $path2)
test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0"
rm -rf $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3/dir
touch $TEST_ROOT/dummy3/dir/file
echo Hello World! > $TEST_ROOT/dummy3/dir/file
touch $TEST_ROOT/dummy3/dir/executable
chmod +x $TEST_ROOT/dummy3/dir/executable
echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable
path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3)
hash3=$(nix-store -q --hash $path3)
test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv"

View file

@ -13,6 +13,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_1) {
ContentAddressMethod { TextIngestionMethod {} }, ContentAddressMethod { TextIngestionMethod {} },
ContentAddressMethod { FileIngestionMethod::Flat }, ContentAddressMethod { FileIngestionMethod::Flat },
ContentAddressMethod { FileIngestionMethod::Recursive }, ContentAddressMethod { FileIngestionMethod::Recursive },
ContentAddressMethod { FileIngestionMethod::Git },
}) { }) {
EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam);
} }
@ -23,6 +24,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_2) {
"text", "text",
"flat", "flat",
"nar", "nar",
"git",
}) { }) {
EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS);
} }

View file

@ -4,6 +4,32 @@
namespace nix { namespace nix {
/* ----------------------------------------------------------------------------
* parseFileSerialisationMethod, renderFileSerialisationMethod
* --------------------------------------------------------------------------*/
TEST(FileSerialisationMethod, testRoundTripPrintParse_1) {
for (const FileSerialisationMethod fim : {
FileSerialisationMethod::Flat,
FileSerialisationMethod::Recursive,
}) {
EXPECT_EQ(parseFileSerialisationMethod(renderFileSerialisationMethod(fim)), fim);
}
}
TEST(FileSerialisationMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : {
"flat",
"nar",
}) {
EXPECT_EQ(renderFileSerialisationMethod(parseFileSerialisationMethod(fimS)), fimS);
}
}
TEST(FileSerialisationMethod, testParseFileSerialisationMethodOptException) {
EXPECT_THROW(parseFileSerialisationMethod("narwhal"), UsageError);
}
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* parseFileIngestionMethod, renderFileIngestionMethod * parseFileIngestionMethod, renderFileIngestionMethod
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
@ -12,6 +38,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_1) {
for (const FileIngestionMethod fim : { for (const FileIngestionMethod fim : {
FileIngestionMethod::Flat, FileIngestionMethod::Flat,
FileIngestionMethod::Recursive, FileIngestionMethod::Recursive,
FileIngestionMethod::Git,
}) { }) {
EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim); EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim);
} }
@ -21,6 +48,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : { for (const std::string_view fimS : {
"flat", "flat",
"nar", "nar",
"git",
}) { }) {
EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS); EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS);
} }