Merge pull request #8918 from obsidiansystems/git-objects

Git object hashing in libstore
This commit is contained in:
John Ericson 2024-02-27 19:02:22 -05:00 committed by GitHub
commit f489a6e42d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 544 additions and 149 deletions

View file

@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \ makefiles += \
tests/functional/local.mk \ tests/functional/local.mk \
tests/functional/ca/local.mk \ tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \ tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \ tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk tests/functional/plugins/local.mk

View file

@ -89,15 +89,20 @@ where
- `rec` = one of: - `rec` = one of:
- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization
- ```ebnf - ```ebnf
| "r:" | "r:"
``` ```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization
- ```ebnf - ```ebnf
| "" | "git:"
``` ```
(empty string) for hashes of the flat (single file) serialization hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format
- ```ebnf - ```ebnf
algo = "md5" | "sha1" | "sha256" algo = "md5" | "sha1" | "sha256"

View file

@ -209,6 +209,10 @@ in {
(lib.getBin lowdown) (lib.getBin lowdown)
mdbook mdbook
mdbook-linkcheck mdbook-linkcheck
] ++ lib.optionals doInstallCheck [
git
mercurial
openssh
] ++ lib.optionals (doInstallCheck || enableManual) [ ] ++ lib.optionals (doInstallCheck || enableManual) [
jq # Also for custom mdBook preprocessor. jq # Also for custom mdBook preprocessor.
] ++ lib.optional stdenv.hostPlatform.isLinux util-linux ] ++ lib.optional stdenv.hostPlatform.isLinux util-linux
@ -249,12 +253,6 @@ in {
dontBuild = !attrs.doBuild; dontBuild = !attrs.doBuild;
doCheck = attrs.doCheck; doCheck = attrs.doCheck;
nativeCheckInputs = [
git
mercurial
openssh
];
disallowedReferences = [ boost ]; disallowedReferences = [ boost ];
preConfigure = lib.optionalString (doBuild && ! stdenv.hostPlatform.isStatic) ( preConfigure = lib.optionalString (doBuild && ! stdenv.hostPlatform.isStatic) (

View file

@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath( Hash h = hashPath(
accessor, canonPath, accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) { } catch (Error & e) {

View file

@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) { auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive; if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat; else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") { else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations); experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {}; ingestionMethod = TextIngestionMethod {};
} else } else
@ -2089,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
}) })
: ({ : ({
StringSource s { contents }; StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
}); });
/* Note: we don't need to add `context' to the context of the /* Note: we don't need to add `context' to the context of the

View file

@ -305,7 +305,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
StorePath BinaryCacheStore::addToStoreFromDump( StorePath BinaryCacheStore::addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) RepairFlag repair)
@ -313,17 +314,27 @@ StorePath BinaryCacheStore::addToStoreFromDump(
std::optional<Hash> caHash; std::optional<Hash> caHash;
std::string nar; std::string nar;
// Calculating Git hash from NAR stream not yet implemented. May not
// be possible to implement in single-pass if the NAR is in an
// inconvenient order. Could fetch after uploading, however.
if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git)
unsupported("addToStoreFromDump");
if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) { if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) {
auto & dump2 = *dump2p; auto & dump2 = *dump2p;
// Hack, this gives us a "replayable" source so we can compute // Hack, this gives us a "replayable" source so we can compute
// multiple hashes more easily. // multiple hashes more easily.
caHash = hashString(HashAlgorithm::SHA256, dump2.s); //
switch (method.getFileIngestionMethod()) { // Only calculate if the dump is in the right format, however.
case FileIngestionMethod::Recursive: if (static_cast<FileIngestionMethod>(dumpMethod) == hashMethod.getFileIngestionMethod())
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (dumpMethod) {
case FileSerialisationMethod::Recursive:
// The dump is already NAR in this case, just use it. // The dump is already NAR in this case, just use it.
nar = dump2.s; nar = dump2.s;
break; break;
case FileIngestionMethod::Flat: case FileSerialisationMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a // The dump is Flat, so we need to convert it to NAR with a
// single file. // single file.
StringSink s; StringSink s;
@ -331,10 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s); nar = std::move(s.s);
break; break;
} }
}
} else { } else {
// Otherwise, we have to do th same hashing as NAR so our single // Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes. // hash will suffice for both purposes.
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
unsupported("addToStoreFromDump"); unsupported("addToStoreFromDump");
} }
StringSource narDump { nar }; StringSource narDump { nar };
@ -349,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
*this, *this,
name, name,
ContentAddressWithReferences::fromParts( ContentAddressWithReferences::fromParts(
method, hashMethod,
caHash ? *caHash : nar.first, caHash ? *caHash : nar.first,
{ {
.others = references, .others = references,
@ -450,7 +462,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */ implementation of this method in terms of addToStoreFromDump. */
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter); accessor.dumpPath(path, sink, filter);

View file

@ -125,7 +125,8 @@ public:
StorePath addToStoreFromDump( StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) override; RepairFlag repair) override;
@ -147,7 +148,7 @@ public:
void narFromPath(const StorePath & path, Sink & sink) override; void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
void addSignatures(const StorePath & storePath, const StringSet & sigs) override; void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

View file

@ -8,6 +8,7 @@
#include "finally.hh" #include "finally.hh"
#include "util.hh" #include "util.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "compression.hh" #include "compression.hh"
#include "daemon.hh" #include "daemon.hh"
#include "topo-sort.hh" #include "topo-sort.hh"
@ -1311,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In
StorePath addToStoreFromDump( StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) override RepairFlag repair) override
{ {
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair); auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair);
goal.addDependency(path); goal.addDependency(path);
return path; return path;
} }
@ -2457,15 +2459,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites); rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */ /* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() }; std::string oldHashPart { scratchPath->hashPart() };
auto got = ({ auto got = [&]{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
dumpPath( auto fim = outputHash.method.getFileIngestionMethod();
accessor, CanonPath { actualPath }, switch (fim) {
caSink, case FileIngestionMethod::Flat:
outputHash.method.getFileIngestionMethod()); case FileIngestionMethod::Recursive:
caSink.finish().first; {
}); HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();
ValidPathInfo newInfo0 { ValidPathInfo newInfo0 {
worker.store, worker.store,
@ -2491,7 +2506,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath( HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath }, accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first; newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second; newInfo0.narSize = narHashAndSize.second;
} }
@ -2515,7 +2530,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath( HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath }, accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first }; ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second; newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs(); auto refs = rewriteRefs();

View file

@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path))) if (!pathExists(store.printStorePath(path)))
res = false; res = false;
else { else {
HashResult current = hashPath( Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) }, *store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo); FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256); Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first; res = info->narHash == nullHash || info->narHash == current;
} }
pathContentsGoodCache.insert_or_assign(path, res); pathContentsGoodCache.insert_or_assign(path, res);
if (!res) if (!res)

View file

@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return ""; return "";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "r:"; return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default: default:
throw Error("impossible, caught both cases"); throw Error("impossible, caught both cases");
} }
@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) { if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive; return FileIngestionMethod::Recursive;
} }
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) { else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {}; return TextIngestionMethod {};
} }
@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat; auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:")) if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive; method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_(); HashAlgorithm hashAlgo = parseHashAlgorithm_();
return { return {
std::move(method), std::move(method),

View file

@ -13,6 +13,7 @@
#include "archive.hh" #include "archive.hh"
#include "derivations.hh" #include "derivations.hh"
#include "args.hh" #include "args.hh"
#include "git.hh"
namespace nix::daemon { namespace nix::daemon {
@ -400,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork(); logger->startWork();
auto pathInfo = [&]() { auto pathInfo = [&]() {
// NB: FramedSource must be out of scope before logger->stopWork(); // NB: FramedSource must be out of scope before logger->stopWork();
auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr); auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr);
auto hashAlgo = hashAlgo_; // work around clang bug
FramedSource source(from); FramedSource source(from);
FileSerialisationMethod dumpMethod;
switch (contentAddressMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
dumpMethod = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
dumpMethod = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
dumpMethod = FileSerialisationMethod::Recursive;
break;
}
// TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store. // TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store.
auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair); auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair);
return store->queryPathInfo(path); return store->queryPathInfo(path);
}(); }();
logger->stopWork(); logger->stopWork();
@ -430,30 +443,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
hashAlgo = parseHashAlgo(hashAlgoRaw); hashAlgo = parseHashAlgo(hashAlgoRaw);
} }
// Old protocol always sends NAR, regardless of hashing method
auto dumpSource = sinkToSource([&](Sink & saved) { auto dumpSource = sinkToSource([&](Sink & saved) {
if (method == FileIngestionMethod::Recursive) { /* We parse the NAR dump through into `saved` unmodified,
/* We parse the NAR dump through into `saved` unmodified, so why all this extra work? We still parse the NAR so
so why all this extra work? We still parse the NAR so that we aren't sending arbitrary data to `saved`
that we aren't sending arbitrary data to `saved` unwittingly`, and we know when the NAR ends so we don't
unwittingly`, and we know when the NAR ends so we don't consume the rest of `from` and can't parse another
consume the rest of `from` and can't parse another command. (We don't trust `addToStoreFromDump` to not
command. (We don't trust `addToStoreFromDump` to not eagerly consume the entire stream it's given, past the
eagerly consume the entire stream it's given, past the length of the Nar. */
length of the Nar. */ TeeSource savedNARSource(from, saved);
TeeSource savedNARSource(from, saved); NullFileSystemObjectSink sink; /* just parse the NAR */
NullFileSystemObjectSink sink; /* just parse the NAR */ parseDump(sink, savedNARSource);
parseDump(sink, savedNARSource);
} else {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
}
}); });
logger->startWork(); logger->startWork();
auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo); auto path = store->addToStoreFromDump(
*dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo);
logger->stopWork(); logger->stopWork();
to << store->printStorePath(path); to << store->printStorePath(path);
@ -485,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork(); logger->startWork();
auto path = ({ auto path = ({
StringSource source { s }; StringSource source { s };
store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
}); });
logger->stopWork(); logger->stopWork();
to << store->printStorePath(path); to << store->printStorePath(path);

View file

@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store,
}) })
: ({ : ({
StringSource s { contents }; StringSource s { contents };
store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
}); });
} }

View file

@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store
virtual StorePath addToStoreFromDump( virtual StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive, FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256, HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(), const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override RepairFlag repair = NoRepair) override

View file

@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor
virtual StorePath addToStoreFromDump( virtual StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive, FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256, HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(), const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override RepairFlag repair = NoRepair) override

View file

@ -43,7 +43,7 @@ public:
LocalFSStore(const Params & params); LocalFSStore(const Params & params);
void narFromPath(const StorePath & path, Sink & sink) override; void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
/** /**
* Creates symlink from the `gcRoot` to the `storePath` and * Creates symlink from the `gcRoot` to the `storePath` and

View file

@ -1,5 +1,6 @@
#include "local-store.hh" #include "local-store.hh"
#include "globals.hh" #include "globals.hh"
#include "git.hh"
#include "archive.hh" #include "archive.hh"
#include "pathlocks.hh" #include "pathlocks.hh"
#include "worker-protocol.hh" #include "worker-protocol.hh"
@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
if (info.ca) { if (info.ca) {
auto & specified = *info.ca; auto & specified = *info.ca;
auto actualHash = ({ auto actualHash = ({
HashModuloSink caSink { auto accessor = getFSAccessor(false);
specified.hash.algo, CanonPath path { printStorePath(info.path) };
std::string { info.path.hashPart() }, Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
}; auto fim = specified.method.getFileIngestionMethod();
PosixSourceAccessor accessor; switch (fim) {
dumpPath( case FileIngestionMethod::Flat:
*getFSAccessor(false), case FileIngestionMethod::Recursive:
CanonPath { printStorePath(info.path) }, {
caSink, HashModuloSink caSink {
specified.method.getFileIngestionMethod()); specified.hash.algo,
std::string { info.path.hashPart() },
};
dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim);
h = caSink.finish().first;
break;
}
case FileIngestionMethod::Git:
h = git::dumpHash(specified.hash.algo, *accessor, path).hash;
break;
}
ContentAddress { ContentAddress {
.method = specified.method, .method = specified.method,
.hash = caSink.finish().first, .hash = std::move(h),
}; };
}); });
if (specified.hash != actualHash.hash) { if (specified.hash != actualHash.hash) {
@ -1137,7 +1148,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
StorePath LocalStore::addToStoreFromDump( StorePath LocalStore::addToStoreFromDump(
Source & source0, Source & source0,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) RepairFlag repair)
@ -1190,7 +1202,13 @@ StorePath LocalStore::addToStoreFromDump(
Path tempDir; Path tempDir;
AutoCloseFD tempDirFd; AutoCloseFD tempDirFd;
if (!inMemory) { bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod;
/* If the methods don't match, our streaming hash of the dump is the
wrong sort, and we need to rehash. */
bool inMemoryAndDontNeedRestore = inMemory && methodsMatch;
if (!inMemoryAndDontNeedRestore) {
/* Drain what we pulled so far, and then keep on pulling */ /* Drain what we pulled so far, and then keep on pulling */
StringSource dumpSource { dump }; StringSource dumpSource { dump };
ChainSource bothSource { dumpSource, source }; ChainSource bothSource { dumpSource, source };
@ -1199,17 +1217,23 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir); delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x"; tempPath = tempDir + "/x";
restorePath(tempPath, bothSource, method.getFileIngestionMethod()); restorePath(tempPath, bothSource, dumpMethod);
dumpBuffer.reset(); dumpBuffer.reset();
dump = {}; dump = {};
} }
auto [hash, size] = hashSink->finish(); auto [dumpHash, size] = hashSink->finish();
PosixSourceAccessor accessor;
auto desc = ContentAddressWithReferences::fromParts( auto desc = ContentAddressWithReferences::fromParts(
method, hashMethod,
hash, methodsMatch
? dumpHash
: hashPath(
accessor, CanonPath { tempPath },
hashMethod.getFileIngestionMethod(), hashAlgo),
{ {
.others = references, .others = references,
// caller is not capable of creating a self-reference, because this is content-addressed without modulus // caller is not capable of creating a self-reference, because this is content-addressed without modulus
@ -1235,10 +1259,20 @@ StorePath LocalStore::addToStoreFromDump(
autoGC(); autoGC();
if (inMemory) { if (inMemoryAndDontNeedRestore) {
StringSource dumpSource { dump }; StringSource dumpSource { dump };
/* Restore from the buffer in memory. */ /* Restore from the buffer in memory. */
restorePath(realPath, dumpSource, method.getFileIngestionMethod()); auto fim = hashMethod.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git:
// doesn't correspond to serialization method, so
// this should be unreachable
assert(false);
}
} else { } else {
/* Move the temporary path we restored above. */ /* Move the temporary path we restored above. */
moveFile(tempPath, realPath); moveFile(tempPath, realPath);
@ -1246,8 +1280,8 @@ StorePath LocalStore::addToStoreFromDump(
/* For computing the nar hash. In recursive SHA-256 mode, this /* For computing the nar hash. In recursive SHA-256 mode, this
is the same as the store hash, so no need to do it again. */ is the same as the store hash, so no need to do it again. */
auto narHash = std::pair { hash, size }; auto narHash = std::pair { dumpHash, size };
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) {
HashSink narSink { HashAlgorithm::SHA256 }; HashSink narSink { HashAlgorithm::SHA256 };
dumpPath(realPath, narSink); dumpPath(realPath, narSink);
narHash = narSink.finish(); narHash = narSink.finish();
@ -1367,7 +1401,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
std::string hash = hashPath( std::string hash = hashPath(
accessor, CanonPath { linkPath }, accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false);
if (hash != link.name) { if (hash != link.name) {
printError("link '%s' was modified! expected hash '%s', got '%s'", printError("link '%s' was modified! expected hash '%s', got '%s'",
linkPath, link.name, hash); linkPath, link.name, hash);

View file

@ -180,7 +180,8 @@ public:
StorePath addToStoreFromDump( StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) override; RepairFlag repair) override;

View file

@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
hashPath( hashPath(
accessor, CanonPath { path }, accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
}); });
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true)); debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));
@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
hashPath( hashPath(
accessor, CanonPath { linkPath }, accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
}))) })))
{ {
// XXX: Consider overwriting linkPath with our valid version. // XXX: Consider overwriting linkPath with our valid version.

View file

@ -13,6 +13,7 @@
#include "derivations.hh" #include "derivations.hh"
#include "pool.hh" #include "pool.hh"
#include "finally.hh" #include "finally.hh"
#include "git.hh"
#include "logging.hh" #include "logging.hh"
#include "callback.hh" #include "callback.hh"
#include "filetransfer.hh" #include "filetransfer.hh"
@ -508,12 +509,28 @@ ref<const ValidPathInfo> RemoteStore::addCAToStore(
StorePath RemoteStore::addToStoreFromDump( StorePath RemoteStore::addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method, FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo, HashAlgorithm hashAlgo,
const StorePathSet & references, const StorePathSet & references,
RepairFlag repair) RepairFlag repair)
{ {
return addCAToStore(dump, name, method, hashAlgo, references, repair)->path; FileSerialisationMethod fsm;
switch (hashMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
fsm = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
fsm = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
fsm = FileSerialisationMethod::Recursive;
break;
}
if (fsm != dumpMethod)
unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination");
return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path;
} }

View file

@ -87,7 +87,8 @@ public:
StorePath addToStoreFromDump( StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive, FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256, HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(), const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override; RepairFlag repair = NoRepair) override;
@ -184,7 +185,7 @@ protected:
friend struct ConnectionHandle; friend struct ConnectionHandle;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath) override; virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
virtual void narFromPath(const StorePath & path, Sink & sink) override; virtual void narFromPath(const StorePath & path, Sink & sink) override;

View file

@ -12,7 +12,9 @@
#include "references.hh" #include "references.hh"
#include "archive.hh" #include "archive.hh"
#include "callback.hh" #include "callback.hh"
#include "git.hh"
#include "remote-store.hh" #include "remote-store.hh"
#include "posix-source-accessor.hh"
// FIXME this should not be here, see TODO below on // FIXME this should not be here, see TODO below on
// `addMultipleToStore`. // `addMultipleToStore`.
#include "worker-protocol.hh" #include "worker-protocol.hh"
@ -119,6 +121,9 @@ static std::string makeType(
StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{ {
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
throw Error("Git file ingestion must use SHA-1 hash");
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) { if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name); return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
} else { } else {
@ -166,7 +171,7 @@ std::pair<StorePath, Hash> StoreDirConfig::computeStorePath(
const StorePathSet & references, const StorePathSet & references,
PathFilter & filter) const PathFilter & filter) const
{ {
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
return { return {
makeFixedOutputPathFromCA( makeFixedOutputPathFromCA(
name, name,
@ -192,10 +197,23 @@ StorePath Store::addToStore(
PathFilter & filter, PathFilter & filter,
RepairFlag repair) RepairFlag repair)
{ {
FileSerialisationMethod fsm;
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
fsm = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
fsm = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
fsm = FileSerialisationMethod::Recursive;
break;
}
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter); dumpPath(accessor, path, sink, fsm, filter);
}); });
return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair);
} }
void Store::addMultipleToStore( void Store::addMultipleToStore(
@ -355,9 +373,7 @@ ValidPathInfo Store::addToStoreSlow(
NullFileSystemObjectSink blank; NullFileSystemObjectSink blank;
auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat
? (FileSystemObjectSink &) fileSink ? (FileSystemObjectSink &) fileSink
: method.getFileIngestionMethod() == FileIngestionMethod::Recursive : (FileSystemObjectSink &) blank; // for recursive or git we do recursive
? (FileSystemObjectSink &) blank
: (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases
/* The information that flows from tapped (besides being replicated in /* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */ narSink), is now put in parseSink. */
@ -369,6 +385,8 @@ ValidPathInfo Store::addToStoreSlow(
auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256 auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256
? narHash ? narHash
: method == FileIngestionMethod::Git
? git::dumpHash(hashAlgo, accessor, srcPath).hash
: caHashSink.finish().first; : caHashSink.finish().first;
if (expectedCAHash && expectedCAHash != hash) if (expectedCAHash && expectedCAHash != hash)

View file

@ -466,14 +466,23 @@ public:
* in `dump`, which is either a NAR serialisation (if recursive == * in `dump`, which is either a NAR serialisation (if recursive ==
* true) or simply the contents of a regular file (if recursive == * true) or simply the contents of a regular file (if recursive ==
* false). * false).
* `dump` may be drained
* *
* \todo remove? * `dump` may be drained.
*
* @param dumpMethod What serialisation format is `dump`, i.e. how
* to deserialize it. Must either match hashMethod or be
* `FileSerialisationMethod::Recursive`.
*
* @param hashMethod How content addressing? Need not match be the
* same as `dumpMethod`.
*
* @todo remove?
*/ */
virtual StorePath addToStoreFromDump( virtual StorePath addToStoreFromDump(
Source & dump, Source & dump,
std::string_view name, std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive, FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256, HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(), const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) = 0; RepairFlag repair = NoRepair) = 0;
@ -772,7 +781,7 @@ protected:
* Helper for methods that are not unsupported: this is used for * Helper for methods that are not unsupported: this is used for
* default definitions for virtual methods that are meant to be overriden. * default definitions for virtual methods that are meant to be overriden.
* *
* \todo Using this should be a last resort. It is better to make * @todo Using this should be a last resort. It is better to make
* the method "virtual pure" and/or move it to a subclass. * the method "virtual pure" and/or move it to a subclass.
*/ */
[[noreturn]] void unsupported(const std::string & op) [[noreturn]] void unsupported(const std::string & op)

View file

@ -35,7 +35,7 @@ public:
static std::set<std::string> uriSchemes() static std::set<std::string> uriSchemes()
{ return {"unix"}; } { return {"unix"}; }
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override
{ return LocalFSStore::getFSAccessor(requireValidPath); } { return LocalFSStore::getFSAccessor(requireValidPath); }
void narFromPath(const StorePath & path, Sink & sink) override void narFromPath(const StorePath & path, Sink & sink) override

View file

@ -1,16 +1,53 @@
#include "file-content-address.hh" #include "file-content-address.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
namespace nix { namespace nix {
FileIngestionMethod parseFileIngestionMethod(std::string_view input) static std::optional<FileSerialisationMethod> parseFileSerialisationMethodOpt(std::string_view input)
{ {
if (input == "flat") { if (input == "flat") {
return FileIngestionMethod::Flat; return FileSerialisationMethod::Flat;
} else if (input == "nar") { } else if (input == "nar") {
return FileIngestionMethod::Recursive; return FileSerialisationMethod::Recursive;
} else { } else {
throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`"); return std::nullopt;
}
}
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input)
{
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return *ret;
else
throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`");
}
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
{
if (input == "git") {
return FileIngestionMethod::Git;
} else {
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return static_cast<FileIngestionMethod>(*ret);
else
throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`");
}
}
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method)
{
switch (method) {
case FileSerialisationMethod::Flat:
return "flat";
case FileSerialisationMethod::Recursive:
return "nar";
default:
assert(false);
} }
} }
@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
return "flat";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "nar"; return renderFileSerialisationMethod(
static_cast<FileSerialisationMethod>(method));
case FileIngestionMethod::Git:
return "git";
default: default:
abort(); abort();
} }
@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
void dumpPath( void dumpPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
Sink & sink, Sink & sink,
FileIngestionMethod method, FileSerialisationMethod method,
PathFilter & filter) PathFilter & filter)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileSerialisationMethod::Flat:
accessor.readFile(path, sink); accessor.readFile(path, sink);
break; break;
case FileIngestionMethod::Recursive: case FileSerialisationMethod::Recursive:
accessor.dumpPath(path, sink, filter); accessor.dumpPath(path, sink, filter);
break; break;
} }
@ -48,13 +87,13 @@ void dumpPath(
void restorePath( void restorePath(
const Path & path, const Path & path,
Source & source, Source & source,
FileIngestionMethod method) FileSerialisationMethod method)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileSerialisationMethod::Flat:
writeFile(path, source); writeFile(path, source);
break; break;
case FileIngestionMethod::Recursive: case FileSerialisationMethod::Recursive:
restorePath(path, source); restorePath(path, source);
break; break;
} }
@ -63,7 +102,7 @@ void restorePath(
HashResult hashPath( HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha, FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter) PathFilter & filter)
{ {
HashSink sink { ha }; HashSink sink { ha };
@ -71,4 +110,20 @@ HashResult hashPath(
return sink.finish(); return sink.finish();
} }
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ht,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first;
case FileIngestionMethod::Git:
return git::dumpHash(ht, accessor, path, filter).hash;
}
}
} }

View file

@ -8,37 +8,38 @@
namespace nix { namespace nix {
/** /**
* An enumeration of the main ways we can serialize file system * An enumeration of the ways we can serialize file system
* objects. * objects.
*/ */
enum struct FileIngestionMethod : uint8_t { enum struct FileSerialisationMethod : uint8_t {
/** /**
* Flat-file hashing. Directly ingest the contents of a single file * Flat-file. The contents of a single file exactly.
*/ */
Flat = 0, Flat,
/** /**
* Recursive (or NAR) hashing. Serializes the file-system object in * Nix Archive. Serializes the file-system object in
* Nix Archive format and ingest that. * Nix Archive format.
*/ */
Recursive = 1, Recursive,
}; };
/** /**
* Parse a `FileIngestionMethod` by name. Choice of: * Parse a `FileSerialisationMethod` by name. Choice of:
* *
* - `flat`: `FileIngestionMethod::Flat` * - `flat`: `FileSerialisationMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive` * - `nar`: `FileSerialisationMethod::Recursive`
* *
* Oppostite of `renderFileIngestionMethod`. * Opposite of `renderFileSerialisationMethod`.
*/ */
FileIngestionMethod parseFileIngestionMethod(std::string_view input); FileSerialisationMethod parseFileSerialisationMethod(std::string_view input);
/** /**
* Render a `FileIngestionMethod` by name. * Render a `FileSerialisationMethod` by name.
* *
* Oppostite of `parseFileIngestionMethod`. * Opposite of `parseFileSerialisationMethod`.
*/ */
std::string_view renderFileIngestionMethod(FileIngestionMethod method); std::string_view renderFileSerialisationMethod(FileSerialisationMethod method);
/** /**
* Dump a serialization of the given file system object. * Dump a serialization of the given file system object.
@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method);
void dumpPath( void dumpPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
Sink & sink, Sink & sink,
FileIngestionMethod method, FileSerialisationMethod method,
PathFilter & filter = defaultPathFilter); PathFilter & filter = defaultPathFilter);
/** /**
* Restore a serialization of the given file system object. * Restore a serialisation of the given file system object.
* *
* @TODO use an arbitrary `FileSystemObjectSink`. * @TODO use an arbitrary `FileSystemObjectSink`.
*/ */
void restorePath( void restorePath(
const Path & path, const Path & path,
Source & source, Source & source,
FileIngestionMethod method); FileSerialisationMethod method);
/** /**
* Compute the hash of the given file system object according to the * Compute the hash of the given file system object according to the
* given method. * given method.
* *
* The hash is defined as (essentially) hashString(ha, dumpPath(path)). * the hash is defined as (in pseudocode):
*
* ```
* hashString(ha, dumpPath(...))
* ```
*/ */
HashResult hashPath( HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);
/**
* An enumeration of the ways we can ingest file system
* objects, producing a hash or digest.
*/
enum struct FileIngestionMethod : uint8_t {
/**
* Hash `FileSerialisationMethod::Flat` serialisation.
*/
Flat,
/**
* Hash `FileSerialisationMethod::Git` serialisation.
*/
Recursive,
/**
* Git hashing. In particular files are hashed as git "blobs", and
* directories are hashed as git "trees".
*
* Unlike `Flat` and `Recursive`, this is not a hash of a single
* serialisation but a [Merkle
* DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple
* rounds of serialisation and hashing.
*
* @note Git's data model is slightly different, in that a plain
* file doesn't have an executable bit, directory entries do
* instead. We decide treat a bare file as non-executable by fiat,
* as we do with `FileIngestionMethod::Flat` which also lacks this
* information. Thus, Git can encode some but all of Nix's "File
* System Objects", and this sort of hashing is likewise partial.
*/
Git,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `git`: `FileIngestionMethod::Git`
*
* Opposite of `renderFileIngestionMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
*
* Opposite of `parseFileIngestionMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* Unlike the other `hashPath`, this works on an arbitrary
* `FileIngestionMethod` instead of `FileSerialisationMethod`, but
* doesn't return the size as this is this is not a both simple and
* useful defined for a merkle format.
*/
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path, SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha, FileIngestionMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter); PathFilter & filter = defaultPathFilter);

View file

@ -113,7 +113,7 @@ bool createUserEnv(EvalState & state, PackageInfos & elems,
std::string str2 = str.str(); std::string str2 = str.str();
StringSource source { str2 }; StringSource source { str2 };
state.store->addToStoreFromDump( state.store->addToStoreFromDump(
source, "env-manifest.nix", TextIngestionMethod {}, HashAlgorithm::SHA256, references); source, "env-manifest.nix", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references);
}); });
/* Get the environment builder expression. */ /* Get the environment builder expression. */

View file

@ -555,7 +555,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise)
HashResult hash = hashPath( HashResult hash = hashPath(
*store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) }, *store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256); FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
info->narHash = hash.first; info->narHash = hash.first;
info->narSize = hash.second; info->narSize = hash.second;
} }

View file

@ -2,6 +2,7 @@
#include "common-args.hh" #include "common-args.hh"
#include "store-api.hh" #include "store-api.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh" #include "posix-source-accessor.hh"
#include "misc-store-flags.hh" #include "misc-store-flags.hh"

View file

@ -226,7 +226,7 @@ static StorePath getDerivationEnvironment(ref<Store> store, ref<Store> evalStore
auto getEnvShPath = ({ auto getEnvShPath = ({
StringSource source { getEnvSh }; StringSource source { getEnvSh };
evalStore->addToStoreFromDump( evalStore->addToStoreFromDump(
source, "get-env.sh", TextIngestionMethod {}, HashAlgorithm::SHA256, {}); source, "get-env.sh", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, {});
}); });
drv.args = {store->printStorePath(getEnvShPath)}; drv.args = {store->printStorePath(getEnvShPath)};

View file

@ -5,6 +5,7 @@
#include "shared.hh" #include "shared.hh"
#include "references.hh" #include "references.hh"
#include "archive.hh" #include "archive.hh"
#include "git.hh"
#include "posix-source-accessor.hh" #include "posix-source-accessor.hh"
#include "misc-store-flags.hh" #include "misc-store-flags.hh"
@ -66,9 +67,11 @@ struct CmdHashBase : Command
{ {
switch (mode) { switch (mode) {
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
return "print cryptographic hash of a regular file"; return "print cryptographic hash of a regular file";
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive:
return "print cryptographic hash of the NAR serialisation of a path"; return "print cryptographic hash of the NAR serialisation of a path";
case FileIngestionMethod::Git:
return "print cryptographic hash of the Git serialisation of a path";
default: default:
assert(false); assert(false);
}; };
@ -77,17 +80,41 @@ struct CmdHashBase : Command
void run() override void run() override
{ {
for (auto path : paths) { for (auto path : paths) {
auto makeSink = [&]() -> std::unique_ptr<AbstractHashSink> {
if (modulus)
return std::make_unique<HashModuloSink>(hashAlgo, *modulus);
else
return std::make_unique<HashSink>(hashAlgo);
};
std::unique_ptr<AbstractHashSink> hashSink; auto [accessor_, canonPath] = PosixSourceAccessor::createAtRoot(path);
if (modulus) auto & accessor = accessor_;
hashSink = std::make_unique<HashModuloSink>(hashAlgo, *modulus); Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
else switch (mode) {
hashSink = std::make_unique<HashSink>(hashAlgo); case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
auto hashSink = makeSink();
dumpPath(accessor, canonPath, *hashSink, (FileSerialisationMethod) mode);
h = hashSink->finish().first;
break;
}
case FileIngestionMethod::Git: {
std::function<git::DumpHook> hook;
hook = [&](const CanonPath & path) -> git::TreeEntry {
auto hashSink = makeSink();
auto mode = dump(accessor, path, *hashSink, hook);
auto hash = hashSink->finish().first;
return {
.mode = mode,
.hash = hash,
};
};
h = hook(canonPath).hash;
break;
}
}
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
dumpPath(accessor, canonPath, *hashSink, mode);
Hash h = hashSink->finish().first;
if (truncate && h.hashSize > 20) h = compressHash(h, 20); if (truncate && h.hashSize > 20) h = compressHash(h, 20);
logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI)); logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI));
} }

View file

@ -0,0 +1,11 @@
source ../common.sh
clearStore
clearCache
# Need backend to support git-hashing too
requireDaemonNewerThan "2.18.0pre20230908"
enableFeatures "git-hashing"
restartDaemon

View file

@ -0,0 +1,7 @@
git-hashing-tests := \
$(d)/simple.sh
install-tests-groups += git-hashing
clean-files += \
$(d)/config.nix

View file

@ -0,0 +1,58 @@
source common.sh
repo="$TEST_ROOT/scratch"
git init "$repo"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
try () {
hash=$(nix hash path --mode git --format base16 --algo sha1 $TEST_ROOT/hash-path)
[[ "$hash" == "$1" ]]
git -C "$repo" rm -rf hash-path || true
cp -r "$TEST_ROOT/hash-path" "$TEST_ROOT/scratch/hash-path"
git -C "$repo" add hash-path
git -C "$repo" commit -m "x"
git -C "$repo" status
hash2=$(git -C "$TEST_ROOT/scratch" rev-parse HEAD:hash-path)
[[ "$hash2" = "$1" ]]
}
# blob
rm -rf $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path
try "557db03de997c86a4a028e1ebd3a1ceb225be238"
# tree with children
rm -rf $TEST_ROOT/hash-path
mkdir $TEST_ROOT/hash-path
echo "Hello World" > $TEST_ROOT/hash-path/hello
echo "Run Hello World" > $TEST_ROOT/hash-path/executable
chmod +x $TEST_ROOT/hash-path/executable
try "e5c0a11a556801a5c9dcf330ca9d7e2c572697f4"
rm -rf $TEST_ROOT/dummy1
echo Hello World! > $TEST_ROOT/dummy1
path1=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy1)
hash1=$(nix-store -q --hash $path1)
test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v"
rm -rf $TEST_ROOT/dummy2
mkdir -p $TEST_ROOT/dummy2
echo Hello World! > $TEST_ROOT/dummy2/hello
path2=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy2)
hash2=$(nix-store -q --hash $path2)
test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0"
rm -rf $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3
mkdir -p $TEST_ROOT/dummy3/dir
touch $TEST_ROOT/dummy3/dir/file
echo Hello World! > $TEST_ROOT/dummy3/dir/file
touch $TEST_ROOT/dummy3/dir/executable
chmod +x $TEST_ROOT/dummy3/dir/executable
echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable
path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3)
hash3=$(nix-store -q --hash $path3)
test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv"

View file

@ -13,6 +13,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_1) {
ContentAddressMethod { TextIngestionMethod {} }, ContentAddressMethod { TextIngestionMethod {} },
ContentAddressMethod { FileIngestionMethod::Flat }, ContentAddressMethod { FileIngestionMethod::Flat },
ContentAddressMethod { FileIngestionMethod::Recursive }, ContentAddressMethod { FileIngestionMethod::Recursive },
ContentAddressMethod { FileIngestionMethod::Git },
}) { }) {
EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam);
} }
@ -23,6 +24,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_2) {
"text", "text",
"flat", "flat",
"nar", "nar",
"git",
}) { }) {
EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS);
} }

View file

@ -4,6 +4,32 @@
namespace nix { namespace nix {
/* ----------------------------------------------------------------------------
* parseFileSerialisationMethod, renderFileSerialisationMethod
* --------------------------------------------------------------------------*/
TEST(FileSerialisationMethod, testRoundTripPrintParse_1) {
for (const FileSerialisationMethod fim : {
FileSerialisationMethod::Flat,
FileSerialisationMethod::Recursive,
}) {
EXPECT_EQ(parseFileSerialisationMethod(renderFileSerialisationMethod(fim)), fim);
}
}
TEST(FileSerialisationMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : {
"flat",
"nar",
}) {
EXPECT_EQ(renderFileSerialisationMethod(parseFileSerialisationMethod(fimS)), fimS);
}
}
TEST(FileSerialisationMethod, testParseFileSerialisationMethodOptException) {
EXPECT_THROW(parseFileSerialisationMethod("narwhal"), UsageError);
}
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* parseFileIngestionMethod, renderFileIngestionMethod * parseFileIngestionMethod, renderFileIngestionMethod
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
@ -12,6 +38,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_1) {
for (const FileIngestionMethod fim : { for (const FileIngestionMethod fim : {
FileIngestionMethod::Flat, FileIngestionMethod::Flat,
FileIngestionMethod::Recursive, FileIngestionMethod::Recursive,
FileIngestionMethod::Git,
}) { }) {
EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim); EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim);
} }
@ -21,6 +48,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_2) {
for (const std::string_view fimS : { for (const std::string_view fimS : {
"flat", "flat",
"nar", "nar",
"git",
}) { }) {
EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS); EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS);
} }