From dbe1b51580451bcb08ccc1c768f7f0f4f0e9f421 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 7 May 2024 19:25:44 +0200 Subject: [PATCH 1/7] Add setting to warn about copying/hashing large paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is useful for diagnosing whether an evaluation is copying large paths to the store. Example: $ nix build .#packages.x86_64-linux.default --large-path-warning-threshold 1000000 warning: copied large path '/home/eelco/Dev/nix-master/' to the store (6271792 bytes) warning: copied large path '«github:NixOS/nixpkgs/b550fe4b4776908ac2a861124307045f8e717c8e?narHash=sha256-7kkJQd4rZ%2BvFrzWu8sTRtta5D1kBG0LSRYAfhtmMlSo%3D»/' to the store (155263768 bytes) warning: copied large path '«github:libgit2/libgit2/45fd9ed7ae1a9b74b957ef4f337bc3c8b3df01b5?narHash=sha256-oX4Z3S9WtJlwvj0uH9HlYcWv%2Bx1hqp8mhXl7HsLu2f0%3D»/' to the store (22175416 bytes) warning: copied large path '/nix/store/z985088mcd6w23qwdlirsinnyzayagki-source' to the store (5885872 bytes) --- perl/lib/Nix/Store.xs | 2 +- src/libstore/binary-cache-store.cc | 2 +- src/libstore/globals.hh | 10 ++++++++++ src/libstore/local-store.cc | 4 ++-- src/libstore/store-api.cc | 10 ++++++++-- src/libstore/unix/build/worker.cc | 4 ++-- src/libutil/file-content-address.cc | 10 ++++++---- src/libutil/file-content-address.hh | 7 ++++--- src/libutil/serialise.hh | 20 ++++++++++++++++++++ 9 files changed, 54 insertions(+), 15 deletions(-) diff --git a/perl/lib/Nix/Store.xs b/perl/lib/Nix/Store.xs index ee211ef64..e751c2be1 100644 --- a/perl/lib/Nix/Store.xs +++ b/perl/lib/Nix/Store.xs @@ -258,7 +258,7 @@ hashPath(char * algo, int base32, char * path) try { Hash h = hashPath( PosixSourceAccessor::createAtRoot(path), - FileIngestionMethod::Recursive, parseHashAlgo(algo)); + FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); } catch (Error & e) { diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 5153ca64f..67d00f364 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -453,7 +453,7 @@ StorePath BinaryCacheStore::addToStore( non-recursive+sha256 so we can just use the default implementation of this method in terms of addToStoreFromDump. */ - auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter).first; auto source = sinkToSource([&](Sink & sink) { path.dumpPath(sink, filter); diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 108933422..dc53a07f1 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -1262,6 +1262,16 @@ public: store paths of the latest Nix release. )" }; + + Setting largePathWarningThreshold{ + this, + std::numeric_limits::max(), + "large-path-warning-threshold", + R"( + Warn when copying a path larger than this number of bytes to the Nix store + (as determined by its NAR serialisation). + )" + }; }; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 800e69309..33c4d7372 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1272,7 +1272,7 @@ StorePath LocalStore::addToStoreFromDump( ? dumpHash : hashPath( PosixSourceAccessor::createAtRoot(tempPath), - hashMethod.getFileIngestionMethod(), hashAlgo), + hashMethod.getFileIngestionMethod(), hashAlgo).first, { .others = references, // caller is not capable of creating a self-reference, because this is content-addressed without modulus @@ -1412,7 +1412,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) PosixSourceAccessor accessor; std::string hash = hashPath( PosixSourceAccessor::createAtRoot(link.path()), - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false); + FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); if (hash != name.string()) { printError("link '%s' was modified! expected hash '%s', got '%s'", link.path(), name, hash); diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 419c55e92..a2095e02e 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -169,7 +169,9 @@ std::pair StoreDirConfig::computeStorePath( const StorePathSet & references, PathFilter & filter) const { - auto h = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + auto [h, size] = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); + if (size && *size >= settings.largePathWarningThreshold) + warn("hashed large path '%s' (%d bytes)", path, *size); return { makeFixedOutputPathFromCA( name, @@ -210,7 +212,11 @@ StorePath Store::addToStore( auto source = sinkToSource([&](Sink & sink) { dumpPath(path, sink, fsm, filter); }); - return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); + LengthSource lengthSource(*source); + auto storePath = addToStoreFromDump(lengthSource, name, fsm, method, hashAlgo, references, repair); + if (lengthSource.total >= settings.largePathWarningThreshold) + warn("copied large path '%s' to the store (%d bytes)", path, lengthSource.total); + return storePath; } void Store::addMultipleToStore( diff --git a/src/libstore/unix/build/worker.cc b/src/libstore/unix/build/worker.cc index 03fc280a4..2cca06213 100644 --- a/src/libstore/unix/build/worker.cc +++ b/src/libstore/unix/build/worker.cc @@ -529,9 +529,9 @@ bool Worker::pathContentsGood(const StorePath & path) if (!pathExists(store.printStorePath(path))) res = false; else { - Hash current = hashPath( + auto current = hashPath( {store.getFSAccessor(), CanonPath(store.printStorePath(path))}, - FileIngestionMethod::Recursive, info->narHash.algo); + FileIngestionMethod::Recursive, info->narHash.algo).first; Hash nullHash(HashAlgorithm::SHA256); res = info->narHash == nullHash || info->narHash == current; } diff --git a/src/libutil/file-content-address.cc b/src/libutil/file-content-address.cc index 769042d00..8b1e3117a 100644 --- a/src/libutil/file-content-address.cc +++ b/src/libutil/file-content-address.cc @@ -112,17 +112,19 @@ HashResult hashPath( } -Hash hashPath( +std::pair> hashPath( const SourcePath & path, FileIngestionMethod method, HashAlgorithm ht, PathFilter & filter) { switch (method) { case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - return hashPath(path, (FileSerialisationMethod) method, ht, filter).first; + case FileIngestionMethod::Recursive: { + auto res = hashPath(path, (FileSerialisationMethod) method, ht, filter); + return {res.first, {res.second}}; + } case FileIngestionMethod::Git: - return git::dumpHash(ht, path, filter).hash; + return {git::dumpHash(ht, path, filter).hash, std::nullopt}; } assert(false); } diff --git a/src/libutil/file-content-address.hh b/src/libutil/file-content-address.hh index 145a8fb1f..cd63be551 100644 --- a/src/libutil/file-content-address.hh +++ b/src/libutil/file-content-address.hh @@ -132,14 +132,15 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method); /** * Compute the hash of the given file system object according to the - * given method. + * given method, and for some ingestion methods, the size of the + * serialisation. * * Unlike the other `hashPath`, this works on an arbitrary * `FileIngestionMethod` instead of `FileSerialisationMethod`, but - * doesn't return the size as this is this is not a both simple and + * may not return the size as this is this is not a both simple and * useful defined for a merkle format. */ -Hash hashPath( +std::pair> hashPath( const SourcePath & path, FileIngestionMethod method, HashAlgorithm ha, PathFilter & filter = defaultPathFilter); diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 6249ddaf5..18f4a79c3 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -283,6 +283,26 @@ struct LengthSink : Sink } }; +/** + * A wrapper source that counts the number of bytes read from it. + */ +struct LengthSource : Source +{ + Source & next; + + LengthSource(Source & next) : next(next) + { } + + uint64_t total = 0; + + size_t read(char * data, size_t len) override + { + auto n = next.read(data, len); + total += n; + return n; + } +}; + /** * Convert a function into a sink. */ From 5314430437d117d2e041b87cc568702c66f8702a Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 10 May 2024 16:49:40 +0200 Subject: [PATCH 2/7] Move printSize() into libutil Also always include the unit (i.e. "MiB" instead of "M"). --- src/libutil/util.cc | 15 +++++++++++++++ src/libutil/util.hh | 6 ++++++ src/nix/path-info.cc | 17 +++-------------- src/nix/path-info.md | 4 ++-- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 103ce4232..f893fc12d 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -112,6 +112,21 @@ std::string rewriteStrings(std::string s, const StringMap & rewrites) } +std::string renderSize(uint64_t value) +{ + static const std::array prefixes{{ + 'K', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' + }}; + size_t power = 0; + double res = value; + while (res > 1024 && power < prefixes.size()) { + ++power; + res /= 1024; + } + return fmt("%6.1f %ciB", power == 0 ? res / 1024 : res, prefixes.at(power)); +} + + bool hasPrefix(std::string_view s, std::string_view prefix) { return s.compare(0, prefix.size(), prefix) == 0; diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 8b049875a..01e42ce57 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -137,6 +137,12 @@ N string2IntWithUnitPrefix(std::string_view s) throw UsageError("'%s' is not an integer", s); } +/** + * Pretty-print a byte value, e.g. 12433615056 is rendered as `11.6 + * GiB`. + */ +std::string renderSize(uint64_t value); + /** * Parse a string into a float. */ diff --git a/src/nix/path-info.cc b/src/nix/path-info.cc index 921b25d7f..a1a2c40f4 100644 --- a/src/nix/path-info.cc +++ b/src/nix/path-info.cc @@ -139,21 +139,10 @@ struct CmdPathInfo : StorePathsCommand, MixJSON void printSize(uint64_t value) { - if (!humanReadable) { + if (humanReadable) + std::cout << fmt("\t%s", renderSize(value)); + else std::cout << fmt("\t%11d", value); - return; - } - - static const std::array idents{{ - ' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' - }}; - size_t power = 0; - double res = value; - while (res > 1024 && power < idents.size()) { - ++power; - res /= 1024; - } - std::cout << fmt("\t%6.1f%c", res, idents.at(power)); } void run(ref store, StorePaths && storePaths) override diff --git a/src/nix/path-info.md b/src/nix/path-info.md index 789984559..2e39225b8 100644 --- a/src/nix/path-info.md +++ b/src/nix/path-info.md @@ -26,8 +26,8 @@ R""( ```console # nix path-info --recursive --size --closure-size --human-readable nixpkgs#rustc - /nix/store/01rrgsg5zk3cds0xgdsq40zpk6g51dz9-ncurses-6.2-dev 386.7K 69.1M - /nix/store/0q783wnvixpqz6dxjp16nw296avgczam-libpfm-4.11.0 5.9M 37.4M + /nix/store/01rrgsg5zk3cds0xgdsq40zpk6g51dz9-ncurses-6.2-dev 386.7 KiB 69.1 MiB + /nix/store/0q783wnvixpqz6dxjp16nw296avgczam-libpfm-4.11.0 5.9 MiB 37.4 MiB … ``` From cf3b044b7efb67f82d151da66b339a9ad8c1e5ac Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 10 May 2024 16:58:19 +0200 Subject: [PATCH 3/7] Make large path warnings human-readable --- src/libstore/store-api.cc | 4 ++-- src/libutil/util.cc | 4 ++-- src/libutil/util.hh | 5 +++-- src/nix/path-info.cc | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index a2095e02e..0b78f999e 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -171,7 +171,7 @@ std::pair StoreDirConfig::computeStorePath( { auto [h, size] = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); if (size && *size >= settings.largePathWarningThreshold) - warn("hashed large path '%s' (%d bytes)", path, *size); + warn("hashed large path '%s' (%s)", path, renderSize(*size)); return { makeFixedOutputPathFromCA( name, @@ -215,7 +215,7 @@ StorePath Store::addToStore( LengthSource lengthSource(*source); auto storePath = addToStoreFromDump(lengthSource, name, fsm, method, hashAlgo, references, repair); if (lengthSource.total >= settings.largePathWarningThreshold) - warn("copied large path '%s' to the store (%d bytes)", path, lengthSource.total); + warn("copied large path '%s' to the store (%s)", path, renderSize(lengthSource.total)); return storePath; } diff --git a/src/libutil/util.cc b/src/libutil/util.cc index f893fc12d..16bca093c 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -112,7 +112,7 @@ std::string rewriteStrings(std::string s, const StringMap & rewrites) } -std::string renderSize(uint64_t value) +std::string renderSize(uint64_t value, bool align) { static const std::array prefixes{{ 'K', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' @@ -123,7 +123,7 @@ std::string renderSize(uint64_t value) ++power; res /= 1024; } - return fmt("%6.1f %ciB", power == 0 ? res / 1024 : res, prefixes.at(power)); + return fmt(align ? "%6.1f %ciB" : "%.1f %ciB", power == 0 ? res / 1024 : res, prefixes.at(power)); } diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 01e42ce57..0c8c82bfd 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -139,9 +139,10 @@ N string2IntWithUnitPrefix(std::string_view s) /** * Pretty-print a byte value, e.g. 12433615056 is rendered as `11.6 - * GiB`. + * GiB`. If `align` is set, the number will be right-justified + * (e.g. `__11.6 GiB`). */ -std::string renderSize(uint64_t value); +std::string renderSize(uint64_t value, bool align = false); /** * Parse a string into a float. diff --git a/src/nix/path-info.cc b/src/nix/path-info.cc index a1a2c40f4..47f9baee5 100644 --- a/src/nix/path-info.cc +++ b/src/nix/path-info.cc @@ -140,7 +140,7 @@ struct CmdPathInfo : StorePathsCommand, MixJSON void printSize(uint64_t value) { if (humanReadable) - std::cout << fmt("\t%s", renderSize(value)); + std::cout << fmt("\t%s", renderSize(value, true)); else std::cout << fmt("\t%11d", value); } From f0b5628eb2ff90bf6e7009966d1fad39214c303d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 13 May 2024 12:08:45 +0200 Subject: [PATCH 4/7] renderSize(): Add some unit tests --- tests/unit/libutil/tests.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/libutil/tests.cc b/tests/unit/libutil/tests.cc index b66872a6e..9be4a400d 100644 --- a/tests/unit/libutil/tests.cc +++ b/tests/unit/libutil/tests.cc @@ -421,6 +421,23 @@ namespace nix { ASSERT_EQ(string2Int("-100"), -100); } + /* ---------------------------------------------------------------------------- + * renderSize + * --------------------------------------------------------------------------*/ + + TEST(renderSize, misc) { + ASSERT_EQ(renderSize(0, true), " 0.0 KiB"); + ASSERT_EQ(renderSize(100, true), " 0.1 KiB"); + ASSERT_EQ(renderSize(100), "0.1 KiB"); + ASSERT_EQ(renderSize(972, true), " 0.9 KiB"); + ASSERT_EQ(renderSize(973, true), " 1.0 KiB"); // FIXME: should round down + ASSERT_EQ(renderSize(1024, true), " 1.0 KiB"); + ASSERT_EQ(renderSize(1024 * 1024, true), "1024.0 KiB"); + ASSERT_EQ(renderSize(1100 * 1024, true), " 1.1 MiB"); + ASSERT_EQ(renderSize(2ULL * 1024 * 1024 * 1024, true), " 2.0 GiB"); + ASSERT_EQ(renderSize(2100ULL * 1024 * 1024 * 1024, true), " 2.1 TiB"); + } + #ifndef _WIN32 // TODO re-enable on Windows, once we can start processes /* ---------------------------------------------------------------------------- * statusOk From 553468216636a0037f988a31f95cda40a0e7e3d0 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 13 May 2024 10:29:35 +0200 Subject: [PATCH 5/7] Update src/libutil/util.hh Co-authored-by: Robert Hensing --- src/libutil/util.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 0c8c82bfd..6db59ef20 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -139,8 +139,8 @@ N string2IntWithUnitPrefix(std::string_view s) /** * Pretty-print a byte value, e.g. 12433615056 is rendered as `11.6 - * GiB`. If `align` is set, the number will be right-justified - * (e.g. `__11.6 GiB`). + * GiB`. If `align` is set, the number will be right-justified by + * padding with spaces on the left. */ std::string renderSize(uint64_t value, bool align = false); From deac00c6d0a019874016021a74e3d42306afd2e6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 3 Jun 2024 15:49:15 +0200 Subject: [PATCH 6/7] Rename large-path-warning-threshold -> warn-large-path-threshold --- src/libstore/globals.hh | 4 ++-- src/libstore/store-api.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 1f3548497..843e77bcf 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -1263,10 +1263,10 @@ public: )" }; - Setting largePathWarningThreshold{ + Setting warnLargePathThreshold{ this, std::numeric_limits::max(), - "large-path-warning-threshold", + "warn-large-path-threshold", R"( Warn when copying a path larger than this number of bytes to the Nix store (as determined by its NAR serialisation). diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 9b519dd84..c67ccd7d4 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -168,7 +168,7 @@ std::pair StoreDirConfig::computeStorePath( PathFilter & filter) const { auto [h, size] = hashPath(path, method.getFileIngestionMethod(), hashAlgo, filter); - if (size && *size >= settings.largePathWarningThreshold) + if (size && *size >= settings.warnLargePathThreshold) warn("hashed large path '%s' (%s)", path, renderSize(*size)); return { makeFixedOutputPathFromCA( @@ -212,7 +212,7 @@ StorePath Store::addToStore( }); LengthSource lengthSource(*source); auto storePath = addToStoreFromDump(lengthSource, name, fsm, method, hashAlgo, references, repair); - if (lengthSource.total >= settings.largePathWarningThreshold) + if (lengthSource.total >= settings.warnLargePathThreshold) warn("copied large path '%s' to the store (%s)", path, renderSize(lengthSource.total)); return storePath; } From d2bfc7e55a8e83964461698f9fbc90b927ef427e Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 3 Jun 2024 15:55:19 +0200 Subject: [PATCH 7/7] Add release note --- doc/manual/rl-next/warn-large-path.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 doc/manual/rl-next/warn-large-path.md diff --git a/doc/manual/rl-next/warn-large-path.md b/doc/manual/rl-next/warn-large-path.md new file mode 100644 index 000000000..5ee3e307f --- /dev/null +++ b/doc/manual/rl-next/warn-large-path.md @@ -0,0 +1,11 @@ +--- +synopsis: Large path warnings +prs: 10661 +--- + +Nix can now warn when evaluation of a Nix expression causes a large +path to be copied to the Nix store. The threshold for this warning can +be configured using [the `warn-large-path-threshold` +setting](@docroot@/command-ref/conf-file.md#warn-large-path-threshold), +e.g. `--warn-large-path-threshold 100M` will warn about paths larger +than 100 MiB.