Add Git object hashing to the store layer

Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
This commit is contained in:
John Ericson 2023-09-04 09:51:23 -04:00
parent 04836c73e5
commit 201551c937
27 changed files with 484 additions and 86 deletions

View file

@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \
tests/functional/local.mk \
tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk

View file

@ -89,15 +89,20 @@ where
- `rec` = one of:
- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization
- ```ebnf
| "r:"
```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization
- ```ebnf
| ""
| "git:"
```
(empty string) for hashes of the flat (single file) serialization
hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format
- ```ebnf
algo = "md5" | "sha1" | "sha256"

View file

@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath(
accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) {

View file

@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") {
else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {};
} else

View file

@ -324,6 +324,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
StringSink s;
@ -331,6 +332,10 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s);
break;
}
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter);

View file

@ -147,7 +147,7 @@ public:
void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

View file

@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
auto got = ({
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto got = [&]{
PosixSourceAccessor accessor;
dumpPath(
accessor, CanonPath { actualPath },
caSink,
outputHash.method.getFileIngestionMethod());
caSink.finish().first;
});
auto fim = outputHash.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();
ValidPathInfo newInfo0 {
worker.store,
@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second;
}
@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs();

View file

@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path)))
res = false;
else {
HashResult current = hashPath(
Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first;
res = info->narHash == nullHash || info->narHash == current;
}
pathContentsGoodCache.insert_or_assign(path, res);
if (!res)

View file

@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive;
}
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {};
}
@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_();
return {
std::move(method),

View file

@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"
namespace nix::daemon {
@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
});
logger->startWork();

View file

@ -43,7 +43,7 @@ public:
LocalFSStore(const Params & params);
void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
/**
* Creates symlink from the `gcRoot` to the `storePath` and

View file

@ -1,5 +1,6 @@
#include "local-store.hh"
#include "globals.hh"
#include "git.hh"
#include "archive.hh"
#include "pathlocks.hh"
#include "worker-protocol.hh"
@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
if (info.ca) {
auto & specified = *info.ca;
auto actualHash = ({
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
PosixSourceAccessor accessor;
dumpPath(
*getFSAccessor(false),
CanonPath { printStorePath(info.path) },
caSink,
specified.method.getFileIngestionMethod());
auto accessor = getFSAccessor(false);
CanonPath path { printStorePath(info.path) };
Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
auto fim = specified.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim);
h = caSink.finish().first;
break;
}
case FileIngestionMethod::Git:
h = git::dumpHash(specified.hash.algo, *accessor, path).hash;
break;
}
ContentAddress {
.method = specified.method,
.hash = caSink.finish().first,
.hash = std::move(h),
};
});
if (specified.hash != actualHash.hash) {
@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";
restorePath(tempPath, bothSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
dumpBuffer.reset();
dump = {};
@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump(
if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the buffer in memory. */
restorePath(realPath, dumpSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
} else {
/* Move the temporary path we restored above. */
moveFile(tempPath, realPath);
@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor;
std::string hash = hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false);
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false);
if (hash != link.name) {
printError("link '%s' was modified! expected hash '%s', got '%s'",
linkPath, link.name, hash);

View file

@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
});
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));
@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
})))
{
// XXX: Consider overwriting linkPath with our valid version.

View file

@ -13,6 +13,7 @@
#include "derivations.hh"
#include "pool.hh"
#include "finally.hh"
#include "git.hh"
#include "logging.hh"
#include "callback.hh"
#include "filetransfer.hh"

View file

@ -184,7 +184,7 @@ protected:
friend struct ConnectionHandle;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
virtual void narFromPath(const StorePath & path, Sink & sink) override;

View file

@ -12,7 +12,9 @@
#include "references.hh"
#include "archive.hh"
#include "callback.hh"
#include "git.hh"
#include "remote-store.hh"
#include "posix-source-accessor.hh"
// FIXME this should not be here, see TODO below on
// `addMultipleToStore`.
#include "worker-protocol.hh"
@ -119,6 +121,9 @@ static std::string makeType(
StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
throw Error("Git file ingestion must use SHA-1 hash");
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
} else {
@ -166,7 +171,7 @@ std::pair<StorePath, Hash> StoreDirConfig::computeStorePath(
const StorePathSet & references,
PathFilter & filter) const
{
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
return {
makeFixedOutputPathFromCA(
name,
@ -193,7 +198,37 @@ StorePath Store::addToStore(
RepairFlag repair)
{
auto source = sinkToSource([&](Sink & sink) {
dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter);
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter);
break;
}
case FileIngestionMethod::Git:
{
git::dump(
accessor, path,
sink,
// recursively add to store if path is a directory
[&](const CanonPath & path) -> git::TreeEntry {
auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair);
auto info = queryPathInfo(storePath);
assert(info->ca);
assert(info->ca->method == FileIngestionMethod::Git);
auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath)));
auto gitModeOpt = git::convertMode(stat.type);
assert(gitModeOpt);
return {
.mode = *gitModeOpt,
.hash = info->ca->hash,
};
},
filter);
break;
}
}
});
return addToStoreFromDump(*source, name, method, hashAlgo, references, repair);
}
@ -355,9 +390,7 @@ ValidPathInfo Store::addToStoreSlow(
NullFileSystemObjectSink blank;
auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat
? (FileSystemObjectSink &) fileSink
: method.getFileIngestionMethod() == FileIngestionMethod::Recursive
? (FileSystemObjectSink &) blank
: (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases
: (FileSystemObjectSink &) blank; // for recursive or git we do recursive
/* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */
@ -369,6 +402,8 @@ ValidPathInfo Store::addToStoreSlow(
auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256
? narHash
: method == FileIngestionMethod::Git
? git::dumpHash(hashAlgo, accessor, srcPath).hash
: caHashSink.finish().first;
if (expectedCAHash && expectedCAHash != hash)

View file

@ -35,7 +35,7 @@ public:
static std::set<std::string> uriSchemes()
{ return {"unix"}; }
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override
{ return LocalFSStore::getFSAccessor(requireValidPath); }
void narFromPath(const StorePath & path, Sink & sink) override

View file

@ -1,16 +1,53 @@
#include "file-content-address.hh"
#include "archive.hh"
#include "git.hh"
namespace nix {
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
static std::optional<FileSerialisationMethod> parseFileSerialisationMethodOpt(std::string_view input)
{
if (input == "flat") {
return FileIngestionMethod::Flat;
return FileSerialisationMethod::Flat;
} else if (input == "nar") {
return FileIngestionMethod::Recursive;
return FileSerialisationMethod::Recursive;
} else {
throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`");
return std::nullopt;
}
}
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input)
{
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return *ret;
else
throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`");
}
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
{
if (input == "git") {
return FileIngestionMethod::Git;
} else {
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return static_cast<FileIngestionMethod>(*ret);
else
throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`");
}
}
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method)
{
switch (method) {
case FileSerialisationMethod::Flat:
return "flat";
case FileSerialisationMethod::Recursive:
return "nar";
default:
assert(false);
}
}
@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
{
switch (method) {
case FileIngestionMethod::Flat:
return "flat";
case FileIngestionMethod::Recursive:
return "nar";
return renderFileSerialisationMethod(
static_cast<FileSerialisationMethod>(method));
case FileIngestionMethod::Git:
return "git";
default:
abort();
}
@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
void dumpPath(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
FileIngestionMethod method,
FileSerialisationMethod method,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
accessor.readFile(path, sink);
break;
case FileIngestionMethod::Recursive:
case FileSerialisationMethod::Recursive:
accessor.dumpPath(path, sink, filter);
break;
}
@ -48,13 +87,13 @@ void dumpPath(
void restorePath(
const Path & path,
Source & source,
FileIngestionMethod method)
FileSerialisationMethod method)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
writeFile(path, source);
break;
case FileIngestionMethod::Recursive:
case FileSerialisationMethod::Recursive:
restorePath(path, source);
break;
}
@ -63,7 +102,7 @@ void restorePath(
HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter)
{
HashSink sink { ha };
@ -71,4 +110,20 @@ HashResult hashPath(
return sink.finish();
}
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ht,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first;
case FileIngestionMethod::Git:
return git::dumpHash(ht, accessor, path, filter).hash;
}
}
}

View file

@ -8,37 +8,38 @@
namespace nix {
/**
* An enumeration of the main ways we can serialize file system
* An enumeration of the ways we can serialize file system
* objects.
*/
enum struct FileIngestionMethod : uint8_t {
enum struct FileSerialisationMethod : uint8_t {
/**
* Flat-file hashing. Directly ingest the contents of a single file
* Flat-file. The contents of a single file exactly.
*/
Flat = 0,
Flat,
/**
* Recursive (or NAR) hashing. Serializes the file-system object in
* Nix Archive format and ingest that.
* Nix Archive. Serializes the file-system object in
* Nix Archive format.
*/
Recursive = 1,
Recursive,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
* Parse a `FileSerialisationMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `flat`: `FileSerialisationMethod::Flat`
* - `nar`: `FileSerialisationMethod::Recursive`
*
* Oppostite of `renderFileIngestionMethod`.
* Opposite of `renderFileSerialisationMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
* Render a `FileSerialisationMethod` by name.
*
* Oppostite of `parseFileIngestionMethod`.
* Opposite of `parseFileSerialisationMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method);
/**
* Dump a serialization of the given file system object.
@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method);
void dumpPath(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
FileIngestionMethod method,
FileSerialisationMethod method,
PathFilter & filter = defaultPathFilter);
/**
* Restore a serialization of the given file system object.
* Restore a serialisation of the given file system object.
*
* @TODO use an arbitrary `FileSystemObjectSink`.
*/
void restorePath(
const Path & path,
Source & source,
FileIngestionMethod method);
FileSerialisationMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* The hash is defined as (essentially) hashString(ha, dumpPath(path)).
* the hash is defined as (in pseudocode):
*
* ```
* hashString(ha, dumpPath(...))
* ```
*/
HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);
/**
* An enumeration of the ways we can ingest file system
* objects, producing a hash or digest.
*/
enum struct FileIngestionMethod : uint8_t {
/**
* Hash `FileSerialisationMethod::Flat` serialisation.
*/
Flat,
/**
* Hash `FileSerialisationMethod::Git` serialisation.
*/
Recursive,
/**
* Git hashing. In particular files are hashed as git "blobs", and
* directories are hashed as git "trees".
*
* Unlike `Flat` and `Recursive`, this is not a hash of a single
* serialisation but a [Merkle
* DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple
* rounds of serialisation and hashing.
*
* @note Git's data model is slightly different, in that a plain
* file doesn't have an executable bit, directory entries do
* instead. We decide treat a bare file as non-executable by fiat,
* as we do with `FileIngestionMethod::Flat` which also lacks this
* information. Thus, Git can encode some but all of Nix's "File
* System Objects", and this sort of hashing is likewise partial.
*/
Git,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `git`: `FileIngestionMethod::Git`
*
* Opposite of `renderFileIngestionMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
*
* Opposite of `parseFileIngestionMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* Unlike the other `hashPath`, this works on an arbitrary
* `FileIngestionMethod` instead of `FileSerialisationMethod`, but
* doesn't return the size as this is this is not a both simple and
* useful defined for a merkle format.
*/
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);

View file

@ -555,7 +555,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise)
HashResult hash = hashPath(
*store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
info->narHash = hash.first;
info->narSize = hash.second;
}

View file

@ -2,6 +2,7 @@
#include "common-args.hh"
#include "store-api.hh"
#include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh"
#include "misc-store-flags.hh"

View file

@ -5,6 +5,7 @@
#include "shared.hh"
#include "references.hh"
#include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh"
#include "misc-store-flags.hh"
@ -66,9 +67,11 @@ struct CmdHashBase : Command
{
switch (mode) {
case FileIngestionMethod::Flat:
return "print cryptographic hash of a regular file";
return "print cryptographic hash of a regular file";
case FileIngestionMethod::Recursive:
return "print cryptographic hash of the NAR serialisation of a path";
case FileIngestionMethod::Git:
return "print cryptographic hash of the Git serialisation of a path";
default:
assert(false);
};
@ -77,17 +80,41 @@ struct CmdHashBase : Command
void run() override
{
for (auto path : paths) {
auto makeSink = [&]() -> std::unique_ptr<AbstractHashSink> {
if (modulus)
return std::make_unique<HashModuloSink>(hashAlgo, *modulus);
else
return std::make_unique<HashSink>(hashAlgo);
};
std::unique_ptr<AbstractHashSink> hashSink;
if (modulus)
hashSink = std::make_unique<HashModuloSink>(hashAlgo, *modulus);
else
hashSink = std::make_unique<HashSink>(hashAlgo);
auto [accessor_, canonPath] = PosixSourceAccessor::createAtRoot(path);
auto & accessor = accessor_;
Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
switch (mode) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
auto hashSink = makeSink();
dumpPath(accessor, canonPath, *hashSink, (FileSerialisationMethod) mode);
h = hashSink->finish().first;
break;
}
case FileIngestionMethod::Git: {
std::function<git::DumpHook> hook;
hook = [&](const CanonPath & path) -> git::TreeEntry {
auto hashSink = makeSink();
auto mode = dump(accessor, path, *hashSink, hook);
auto hash = hashSink->finish().first;
return {
.mode = mode,
.hash = hash,
};
};
h = hook(canonPath).hash;
break;
}
}
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
dumpPath(accessor, canonPath, *hashSink, mode);
Hash h = hashSink->finish().first;
if (truncate && h.hashSize > 20) h = compressHash(h, 20);
logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI));
}

View file

@ -0,0 +1,11 @@
source ../common.sh
clearStore
clearCache
# Need backend to support git-hashing too
requireDaemonNewerThan "2.18.0pre20230908"
enableFeatures "git-hashing"
restartDaemon

View file

@ -0,0 +1,7 @@
git-hashing-tests := \
$(d)/simple.sh
install-tests-groups += git-hashing
clean-files += \
$(d)/config.nix

View file

@ -0,0 +1,58 @@
source common.sh
repo="$TEST_ROOT/scratch"
git init "$repo"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
try () {
hash=$(nix hash path --mode git --format base16 --algo sha1 $TEST_ROOT/hash-path)
[[ "$hash" == "$1" ]]
git -C "$repo" rm -rf hash-path || true
cp -r "$TEST_ROOT/hash-path" "$TEST_ROOT/scratch/hash-path"
git -C "$repo" add hash-path
git -C "$repo" commit -m "x"
git -C "$repo" status
hash2=$(git -C "$TEST_ROOT/scratch" rev-parse HEAD:hash-path)
[[ "$hash2" = "$1" ]]
}
# blob
rm -rf $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path
try "557db03de997c86a4a028e1ebd3a1ceb225be238"
# tree with children
rm -rf $TEST_ROOT/hash-path
mkdir $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path/hello
echo "Run Hello World" > $TEST_ROOT/hash-path/executable
chmod +x $TEST_ROOT/hash-path/executable
try "e5c0a11a556801a5c9dcf330ca9d7e2c572697f4"
rm -rf $TEST_ROOT/dummy1
echo Hello World! > $TEST_ROOT/dummy1
path1=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy1)
hash1=$(nix-store -q --hash $path1)
test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v"
rm -rf $TEST_ROOT/dummy2
mkdir -p $TEST_ROOT/dummy2
echo Hello World! > $TEST_ROOT/dummy2/hello
path2=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy2)
hash2=$(nix-store -q --hash $path2)
test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0"
rm -rf $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3/dir
touch $TEST_ROOT/dummy3/dir/file
echo Hello World! > $TEST_ROOT/dummy3/dir/file
touch $TEST_ROOT/dummy3/dir/executable
chmod +x $TEST_ROOT/dummy3/dir/executable
echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable
path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3)
hash3=$(nix-store -q --hash $path3)
test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv"

View file

@ -13,6 +13,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_1) {
ContentAddressMethod { TextIngestionMethod {} },
ContentAddressMethod { FileIngestionMethod::Flat },
ContentAddressMethod { FileIngestionMethod::Recursive },
ContentAddressMethod { FileIngestionMethod::Git },
}) {
EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam);
}
@ -23,6 +24,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_2) {
"text",
"flat",
"nar",
"git",
}) {
EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS);
}

View file

@ -4,6 +4,32 @@
namespace nix {
/* ----------------------------------------------------------------------------
* parseFileSerialisationMethod, renderFileSerialisationMethod
* --------------------------------------------------------------------------*/
TEST(FileSerialisationMethod, testRoundTripPrintParse_1) {
for (const FileSerialisationMethod fim : {
FileSerialisationMethod::Flat,
FileSerialisationMethod::Recursive,
}) {
EXPECT_EQ(parseFileSerialisationMethod(renderFileSerialisationMethod(fim)), fim);
}
}
TEST(FileSerialisationMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : {
"flat",
"nar",
}) {
EXPECT_EQ(renderFileSerialisationMethod(parseFileSerialisationMethod(fimS)), fimS);
}
}
TEST(FileSerialisationMethod, testParseFileSerialisationMethodOptException) {
EXPECT_THROW(parseFileSerialisationMethod("narwhal"), UsageError);
}
/* ----------------------------------------------------------------------------
* parseFileIngestionMethod, renderFileIngestionMethod
* --------------------------------------------------------------------------*/
@ -12,6 +38,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_1) {
for (const FileIngestionMethod fim : {
FileIngestionMethod::Flat,
FileIngestionMethod::Recursive,
FileIngestionMethod::Git,
}) {
EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim);
}
@ -21,6 +48,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : {
"flat",
"nar",
"git",
}) {
EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS);
}