Merge pull request #10661 from edolstra/large-path-warning

Add setting to warn about copying/hashing large paths
This commit is contained in:
Eelco Dolstra 2024-06-03 17:04:43 +02:00 committed by GitHub
commit a0e35d92d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 109 additions and 31 deletions

View file

@ -0,0 +1,11 @@
---
synopsis: Large path warnings
prs: 10661
---
Nix can now warn when evaluation of a Nix expression causes a large
path to be copied to the Nix store. The threshold for this warning can
be configured using [the `warn-large-path-threshold`
setting](@docroot@/command-ref/conf-file.md#warn-large-path-threshold),
e.g. `--warn-large-path-threshold 100M` will warn about paths larger
than 100 MiB.

View file

@ -258,7 +258,7 @@ hashPath(char * algo, int base32, char * path)
try { try {
Hash h = hashPath( Hash h = hashPath(
PosixSourceAccessor::createAtRoot(path), PosixSourceAccessor::createAtRoot(path),
FileIngestionMethod::Recursive, parseHashAlgo(algo)); FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) { } catch (Error & e) {

View file

@ -454,7 +454,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */ implementation of this method in terms of addToStoreFromDump. */
auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
path.dumpPath(sink, filter); path.dumpPath(sink, filter);

View file

@ -528,9 +528,9 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path))) if (!pathExists(store.printStorePath(path)))
res = false; res = false;
else { else {
Hash current = hashPath( auto current = hashPath(
{store.getFSAccessor(), CanonPath(store.printStorePath(path))}, {store.getFSAccessor(), CanonPath(store.printStorePath(path))},
FileIngestionMethod::Recursive, info->narHash.algo); FileIngestionMethod::Recursive, info->narHash.algo).first;
Hash nullHash(HashAlgorithm::SHA256); Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current; res = info->narHash == nullHash || info->narHash == current;
} }

View file

@ -1262,6 +1262,16 @@ public:
store paths of the latest Nix release. store paths of the latest Nix release.
)" )"
}; };
Setting<uint64_t> warnLargePathThreshold{
this,
std::numeric_limits<uint64_t>::max(),
"warn-large-path-threshold",
R"(
Warn when copying a path larger than this number of bytes to the Nix store
(as determined by its NAR serialisation).
)"
};
}; };

View file

@ -1282,7 +1282,7 @@ StorePath LocalStore::addToStoreFromDump(
? dumpHash ? dumpHash
: hashPath( : hashPath(
PosixSourceAccessor::createAtRoot(tempPath), PosixSourceAccessor::createAtRoot(tempPath),
hashMethod.getFileIngestionMethod(), hashAlgo), hashMethod.getFileIngestionMethod(), hashAlgo).first,
{ {
.others = references, .others = references,
// caller is not capable of creating a self-reference, because this is content-addressed without modulus // caller is not capable of creating a self-reference, because this is content-addressed without modulus
@ -1422,7 +1422,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor; PosixSourceAccessor accessor;
std::string hash = hashPath( std::string hash = hashPath(
PosixSourceAccessor::createAtRoot(link.path()), PosixSourceAccessor::createAtRoot(link.path()),
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false); FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false);
if (hash != name.string()) { if (hash != name.string()) {
printError("link '%s' was modified! expected hash '%s', got '%s'", printError("link '%s' was modified! expected hash '%s', got '%s'",
link.path(), name, hash); link.path(), name, hash);

View file

@ -167,7 +167,9 @@ std::pair<StorePath, Hash> StoreDirConfig::computeStorePath(
const StorePathSet & references, const StorePathSet & references,
PathFilter & filter) const PathFilter & filter) const
{ {
auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); auto [h, size] = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter);
if (size && *size >= settings.warnLargePathThreshold)
warn("hashed large path '%s' (%s)", path, renderSize(*size));
return { return {
makeFixedOutputPathFromCA( makeFixedOutputPathFromCA(
name, name,
@ -208,7 +210,11 @@ StorePath Store::addToStore(
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & sink) {
dumpPath(path, sink, fsm, filter); dumpPath(path, sink, fsm, filter);
}); });
return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); LengthSource lengthSource(*source);
auto storePath = addToStoreFromDump(lengthSource, name, fsm, method, hashAlgo, references, repair);
if (lengthSource.total >= settings.warnLargePathThreshold)
warn("copied large path '%s' to the store (%s)", path, renderSize(lengthSource.total));
return storePath;
} }
void Store::addMultipleToStore( void Store::addMultipleToStore(

View file

@ -112,17 +112,19 @@ HashResult hashPath(
} }
Hash hashPath( std::pair<Hash, std::optional<uint64_t>> hashPath(
const SourcePath & path, const SourcePath & path,
FileIngestionMethod method, HashAlgorithm ht, FileIngestionMethod method, HashAlgorithm ht,
PathFilter & filter) PathFilter & filter)
{ {
switch (method) { switch (method) {
case FileIngestionMethod::Flat: case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive: case FileIngestionMethod::Recursive: {
return hashPath(path, (FileSerialisationMethod) method, ht, filter).first; auto res = hashPath(path, (FileSerialisationMethod) method, ht, filter);
return {res.first, {res.second}};
}
case FileIngestionMethod::Git: case FileIngestionMethod::Git:
return git::dumpHash(ht, path, filter).hash; return {git::dumpHash(ht, path, filter).hash, std::nullopt};
} }
assert(false); assert(false);
} }

View file

@ -143,14 +143,15 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method);
/** /**
* Compute the hash of the given file system object according to the * Compute the hash of the given file system object according to the
* given method. * given method, and for some ingestion methods, the size of the
* serialisation.
* *
* Unlike the other `hashPath`, this works on an arbitrary * Unlike the other `hashPath`, this works on an arbitrary
* `FileIngestionMethod` instead of `FileSerialisationMethod`, but * `FileIngestionMethod` instead of `FileSerialisationMethod`, but
* doesn't return the size as this is this is not a both simple and * may not return the size as this is this is not a both simple and
* useful defined for a merkle format. * useful defined for a merkle format.
*/ */
Hash hashPath( std::pair<Hash, std::optional<uint64_t>> hashPath(
const SourcePath & path, const SourcePath & path,
FileIngestionMethod method, HashAlgorithm ha, FileIngestionMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter); PathFilter & filter = defaultPathFilter);

View file

@ -283,6 +283,26 @@ struct LengthSink : Sink
} }
}; };
/**
* A wrapper source that counts the number of bytes read from it.
*/
struct LengthSource : Source
{
Source & next;
LengthSource(Source & next) : next(next)
{ }
uint64_t total = 0;
size_t read(char * data, size_t len) override
{
auto n = next.read(data, len);
total += n;
return n;
}
};
/** /**
* Convert a function into a sink. * Convert a function into a sink.
*/ */

View file

@ -151,6 +151,21 @@ template std::optional<double> string2Float<double>(const std::string_view s);
template std::optional<float> string2Float<float>(const std::string_view s); template std::optional<float> string2Float<float>(const std::string_view s);
std::string renderSize(uint64_t value, bool align)
{
static const std::array<char, 9> prefixes{{
'K', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'
}};
size_t power = 0;
double res = value;
while (res > 1024 && power < prefixes.size()) {
++power;
res /= 1024;
}
return fmt(align ? "%6.1f %ciB" : "%.1f %ciB", power == 0 ? res / 1024 : res, prefixes.at(power));
}
bool hasPrefix(std::string_view s, std::string_view prefix) bool hasPrefix(std::string_view s, std::string_view prefix)
{ {
return s.compare(0, prefix.size(), prefix) == 0; return s.compare(0, prefix.size(), prefix) == 0;

View file

@ -127,6 +127,13 @@ N string2IntWithUnitPrefix(std::string_view s)
throw UsageError("'%s' is not an integer", s); throw UsageError("'%s' is not an integer", s);
} }
/**
* Pretty-print a byte value, e.g. 12433615056 is rendered as `11.6
* GiB`. If `align` is set, the number will be right-justified by
* padding with spaces on the left.
*/
std::string renderSize(uint64_t value, bool align = false);
/** /**
* Parse a string into a float. * Parse a string into a float.
*/ */

View file

@ -139,21 +139,10 @@ struct CmdPathInfo : StorePathsCommand, MixJSON
void printSize(uint64_t value) void printSize(uint64_t value)
{ {
if (!humanReadable) { if (humanReadable)
std::cout << fmt("\t%s", renderSize(value, true));
else
std::cout << fmt("\t%11d", value); std::cout << fmt("\t%11d", value);
return;
}
static const std::array<char, 9> idents{{
' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'
}};
size_t power = 0;
double res = value;
while (res > 1024 && power < idents.size()) {
++power;
res /= 1024;
}
std::cout << fmt("\t%6.1f%c", res, idents.at(power));
} }
void run(ref<Store> store, StorePaths && storePaths) override void run(ref<Store> store, StorePaths && storePaths) override

View file

@ -26,8 +26,8 @@ R""(
```console ```console
# nix path-info --recursive --size --closure-size --human-readable nixpkgs#rustc # nix path-info --recursive --size --closure-size --human-readable nixpkgs#rustc
/nix/store/01rrgsg5zk3cds0xgdsq40zpk6g51dz9-ncurses-6.2-dev 386.7K 69.1M /nix/store/01rrgsg5zk3cds0xgdsq40zpk6g51dz9-ncurses-6.2-dev 386.7 KiB 69.1 MiB
/nix/store/0q783wnvixpqz6dxjp16nw296avgczam-libpfm-4.11.0 5.9M 37.4M /nix/store/0q783wnvixpqz6dxjp16nw296avgczam-libpfm-4.11.0 5.9 MiB 37.4 MiB
``` ```

View file

@ -421,6 +421,23 @@ namespace nix {
ASSERT_EQ(string2Int<int>("-100"), -100); ASSERT_EQ(string2Int<int>("-100"), -100);
} }
/* ----------------------------------------------------------------------------
* renderSize
* --------------------------------------------------------------------------*/
TEST(renderSize, misc) {
ASSERT_EQ(renderSize(0, true), " 0.0 KiB");
ASSERT_EQ(renderSize(100, true), " 0.1 KiB");
ASSERT_EQ(renderSize(100), "0.1 KiB");
ASSERT_EQ(renderSize(972, true), " 0.9 KiB");
ASSERT_EQ(renderSize(973, true), " 1.0 KiB"); // FIXME: should round down
ASSERT_EQ(renderSize(1024, true), " 1.0 KiB");
ASSERT_EQ(renderSize(1024 * 1024, true), "1024.0 KiB");
ASSERT_EQ(renderSize(1100 * 1024, true), " 1.1 MiB");
ASSERT_EQ(renderSize(2ULL * 1024 * 1024 * 1024, true), " 2.0 GiB");
ASSERT_EQ(renderSize(2100ULL * 1024 * 1024 * 1024, true), " 2.1 TiB");
}
#ifndef _WIN32 // TODO re-enable on Windows, once we can start processes #ifndef _WIN32 // TODO re-enable on Windows, once we can start processes
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* statusOk * statusOk