From 99a691c8a1abffd30077bd5f005cb8d4bbafae5c Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 24 Dec 2023 21:14:08 +0100 Subject: [PATCH 1/5] don't use istreams in hot paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit istream sentry objects are very expensive for single-character operations, and since we don't configure exception masks for the istreams used here they don't even do anything. all we need is end-of-string checks and an advancing position in an immutable memory buffer, both of which can be had for much cheaper than istreams allow. the effect of this change is most apparent on empty stores. before: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 7.167 s ± 0.013 s [User: 5.528 s, System: 1.431 s] Range (min … max): 7.147 s … 7.182 s 10 runs after: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.963 s ± 0.011 s [User: 5.330 s, System: 1.421 s] Range (min … max): 6.943 s … 6.974 s 10 runs --- src/libstore/derivations.cc | 47 +++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 8a7d660ff..973ce5211 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -154,18 +154,39 @@ StorePath writeDerivation(Store & store, } -/* Read string `s' from stream `str'. */ -static void expect(std::istream & str, std::string_view s) -{ - for (auto & c : s) { - if (str.get() != c) - throw FormatError("expected string '%1%'", s); +namespace { +/** + * This mimics std::istream to some extent. We use this much smaller implementation + * instead of plain istreams because the sentry object overhead is too high. + */ +struct StringViewStream { + std::string_view remaining; + + int peek() const { + return remaining.empty() ? EOF : remaining[0]; } + + int get() { + if (remaining.empty()) return EOF; + char c = remaining[0]; + remaining.remove_prefix(1); + return c; + } +}; +} + + +/* Read string `s' from stream `str'. */ +static void expect(StringViewStream & str, std::string_view s) +{ + if (!str.remaining.starts_with(s)) + throw FormatError("expected string '%1%'", s); + str.remaining.remove_prefix(s.size()); } /* Read a C-style string from stream `str'. */ -static std::string parseString(std::istream & str) +static std::string parseString(StringViewStream & str) { std::string res; expect(str, "\""); @@ -187,7 +208,7 @@ static void validatePath(std::string_view s) { throw FormatError("bad path '%1%' in derivation", s); } -static Path parsePath(std::istream & str) +static Path parsePath(StringViewStream & str) { auto s = parseString(str); validatePath(s); @@ -195,7 +216,7 @@ static Path parsePath(std::istream & str) } -static bool endOfList(std::istream & str) +static bool endOfList(StringViewStream & str) { if (str.peek() == ',') { str.get(); @@ -209,7 +230,7 @@ static bool endOfList(std::istream & str) } -static StringSet parseStrings(std::istream & str, bool arePaths) +static StringSet parseStrings(StringViewStream & str, bool arePaths) { StringSet res; expect(str, "["); @@ -267,7 +288,7 @@ static DerivationOutput parseDerivationOutput( } static DerivationOutput parseDerivationOutput( - const StoreDirConfig & store, std::istringstream & str, + const StoreDirConfig & store, StringViewStream & str, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings) { expect(str, ","); const auto pathS = parseString(str); @@ -297,7 +318,7 @@ enum struct DerivationATermVersion { static DerivedPathMap::ChildNode parseDerivedPathMapNode( const StoreDirConfig & store, - std::istringstream & str, + StringViewStream & str, DerivationATermVersion version) { DerivedPathMap::ChildNode node; @@ -349,7 +370,7 @@ Derivation parseDerivation( Derivation drv; drv.name = name; - std::istringstream str(std::move(s)); + StringViewStream str{s}; expect(str, "D"); DerivationATermVersion version; switch (str.peek()) { From 2cfc4ace35d1c8cca917c487be3cfddfcf3bba01 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 26 Dec 2023 17:40:55 +0100 Subject: [PATCH 2/5] malloc/memset even less MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit more buffers that can be uninitialized and on the stack. small difference, but still worth doing. before: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.963 s ± 0.011 s [User: 5.330 s, System: 1.421 s] Range (min … max): 6.943 s … 6.974 s 10 runs after: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.952 s ± 0.015 s [User: 5.294 s, System: 1.452 s] Range (min … max): 6.926 s … 6.974 s 10 runs --- src/libutil/archive.cc | 2 +- src/libutil/file-system.cc | 2 +- src/libutil/serialise.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 465df2073..712ea51c7 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -140,7 +140,7 @@ static void parseContents(ParseSink & sink, Source & source, const Path & path) sink.preallocateContents(size); uint64_t left = size; - std::vector buf(65536); + std::array buf; while (left) { checkInterrupt(); diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index c96effff9..4cac35ace 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -307,7 +307,7 @@ void writeFile(const Path & path, Source & source, mode_t mode, bool sync) if (!fd) throw SysError("opening file '%1%'", path); - std::vector buf(64 * 1024); + std::array buf; try { while (true) { diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index f465bd0de..76b378e18 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -82,7 +82,7 @@ void Source::operator () (std::string_view data) void Source::drainInto(Sink & sink) { std::string s; - std::vector buf(8192); + std::array buf; while (true) { size_t n; try { From 79d3d412cacd210bc9a0e9ba5407eea67c8e3868 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 26 Dec 2023 22:18:42 +0100 Subject: [PATCH 3/5] optimize derivation string parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit a bunch of derivation strings contain no escape sequences. we can optimize for this fact by first scanning for the end of a derivation string and simply returning the contents unmodified if no escape sequences were found. to make this even more efficient we can also use BackedStringViews to avoid copies, avoiding heap allocations for transient data. before: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.952 s ± 0.015 s [User: 5.294 s, System: 1.452 s] Range (min … max): 6.926 s … 6.974 s 10 runs after: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.907 s ± 0.012 s [User: 5.272 s, System: 1.429 s] Range (min … max): 6.893 s … 6.926 s 10 runs --- doc/manual/rl-next/drv-string-parse-hang.md | 6 ++ src/libstore/derivations.cc | 65 +++++++++++++-------- 2 files changed, 48 insertions(+), 23 deletions(-) create mode 100644 doc/manual/rl-next/drv-string-parse-hang.md diff --git a/doc/manual/rl-next/drv-string-parse-hang.md b/doc/manual/rl-next/drv-string-parse-hang.md new file mode 100644 index 000000000..1e041d3e9 --- /dev/null +++ b/doc/manual/rl-next/drv-string-parse-hang.md @@ -0,0 +1,6 @@ +--- +synopsis: Fix handling of truncated `.drv` files. +prs: 9673 +--- + +Previously a `.drv` that was truncated in the middle of a string would case nix to enter an infinite loop, eventually exhausting all memory and crashing. diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 973ce5211..89d902917 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -2,6 +2,7 @@ #include "downstream-placeholder.hh" #include "store-api.hh" #include "globals.hh" +#include "types.hh" #include "util.hh" #include "split.hh" #include "common-protocol.hh" @@ -186,20 +187,38 @@ static void expect(StringViewStream & str, std::string_view s) /* Read a C-style string from stream `str'. */ -static std::string parseString(StringViewStream & str) +static BackedStringView parseString(StringViewStream & str) { - std::string res; expect(str, "\""); - int c; - while ((c = str.get()) != '"') - if (c == '\\') { - c = str.get(); - if (c == 'n') res += '\n'; - else if (c == 'r') res += '\r'; - else if (c == 't') res += '\t'; - else res += c; + auto c = str.remaining.begin(), end = str.remaining.end(); + bool escaped = false; + for (; c != end && *c != '"'; c++) { + if (*c == '\\') { + c++; + if (c == end) + throw FormatError("unterminated string in derivation"); + escaped = true; } - else res += c; + } + + const auto contentLen = c - str.remaining.begin(); + const auto content = str.remaining.substr(0, contentLen); + str.remaining.remove_prefix(contentLen + 1); + + if (!escaped) + return content; + + std::string res; + res.reserve(content.size()); + for (c = content.begin(), end = content.end(); c != end; c++) + if (*c == '\\') { + c++; + if (*c == 'n') res += '\n'; + else if (*c == 'r') res += '\r'; + else if (*c == 't') res += '\t'; + else res += *c; + } + else res += *c; return res; } @@ -210,7 +229,7 @@ static void validatePath(std::string_view s) { static Path parsePath(StringViewStream & str) { - auto s = parseString(str); + auto s = parseString(str).toOwned(); validatePath(s); return s; } @@ -235,7 +254,7 @@ static StringSet parseStrings(StringViewStream & str, bool arePaths) StringSet res; expect(str, "["); while (!endOfList(str)) - res.insert(arePaths ? parsePath(str) : parseString(str)); + res.insert(arePaths ? parsePath(str) : parseString(str).toOwned()); return res; } @@ -296,7 +315,7 @@ static DerivationOutput parseDerivationOutput( expect(str, ","); const auto hash = parseString(str); expect(str, ")"); - return parseDerivationOutput(store, pathS, hashAlgo, hash, xpSettings); + return parseDerivationOutput(store, *pathS, *hashAlgo, *hash, xpSettings); } /** @@ -344,7 +363,7 @@ static DerivedPathMap::ChildNode parseDerivedPathMapNode( expect(str, ",["); while (!endOfList(str)) { expect(str, "("); - auto outputName = parseString(str); + auto outputName = parseString(str).toOwned(); expect(str, ","); node.childMap.insert_or_assign(outputName, parseDerivedPathMapNode(store, str, version)); expect(str, ")"); @@ -381,12 +400,12 @@ Derivation parseDerivation( case 'r': { expect(str, "rvWithVersion("); auto versionS = parseString(str); - if (versionS == "xp-dyn-drv") { + if (*versionS == "xp-dyn-drv") { // Only verison we have so far version = DerivationATermVersion::DynamicDerivations; xpSettings.require(Xp::DynamicDerivations); } else { - throw FormatError("Unknown derivation ATerm format version '%s'", versionS); + throw FormatError("Unknown derivation ATerm format version '%s'", *versionS); } expect(str, ","); break; @@ -398,7 +417,7 @@ Derivation parseDerivation( /* Parse the list of outputs. */ expect(str, "["); while (!endOfList(str)) { - expect(str, "("); std::string id = parseString(str); + expect(str, "("); std::string id = parseString(str).toOwned(); auto output = parseDerivationOutput(store, str, xpSettings); drv.outputs.emplace(std::move(id), std::move(output)); } @@ -414,19 +433,19 @@ Derivation parseDerivation( } expect(str, ","); drv.inputSrcs = store.parseStorePathSet(parseStrings(str, true)); - expect(str, ","); drv.platform = parseString(str); - expect(str, ","); drv.builder = parseString(str); + expect(str, ","); drv.platform = parseString(str).toOwned(); + expect(str, ","); drv.builder = parseString(str).toOwned(); /* Parse the builder arguments. */ expect(str, ",["); while (!endOfList(str)) - drv.args.push_back(parseString(str)); + drv.args.push_back(parseString(str).toOwned()); /* Parse the environment variables. */ expect(str, ",["); while (!endOfList(str)) { - expect(str, "("); auto name = parseString(str); - expect(str, ","); auto value = parseString(str); + expect(str, "("); auto name = parseString(str).toOwned(); + expect(str, ","); auto value = parseString(str).toOwned(); expect(str, ")"); drv.env[name] = value; } From 02c64abf1e892220cb62ce3b7e1598030fb6a61c Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 26 Dec 2023 05:44:52 +0100 Subject: [PATCH 4/5] use translation table for drv string parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the table is very small compared to cache sizes and a single indexed load is faster than three comparisons. before: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.907 s ± 0.012 s [User: 5.272 s, System: 1.429 s] Range (min … max): 6.893 s … 6.926 s 10 runs after: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.883 s ± 0.016 s [User: 5.250 s, System: 1.424 s] Range (min … max): 6.860 s … 6.905 s 10 runs --- src/libstore/derivations.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 89d902917..89a345057 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -174,6 +174,17 @@ struct StringViewStream { return c; } }; + +constexpr struct Escapes { + char map[256]; + constexpr Escapes() { + for (int i = 0; i < 256; i++) map[i] = (char) (unsigned char) i; + map[(int) (unsigned char) 'n'] = '\n'; + map[(int) (unsigned char) 'r'] = '\r'; + map[(int) (unsigned char) 't'] = '\t'; + } + char operator[](char c) const { return map[(unsigned char) c]; } +} escapes; } @@ -213,10 +224,7 @@ static BackedStringView parseString(StringViewStream & str) for (c = content.begin(), end = content.end(); c != end; c++) if (*c == '\\') { c++; - if (*c == 'n') res += '\n'; - else if (*c == 'r') res += '\r'; - else if (*c == 't') res += '\t'; - else res += *c; + res += escapes[*c]; } else res += *c; return res; From c62686a95bd3ebbf3f5104c27222e751e84b84a3 Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 27 Dec 2023 04:26:50 +0100 Subject: [PATCH 5/5] reduce copies during drv parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit many paths need not be heap-allocated, and derivation env name/valye pairs can be moved into the map. before: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.883 s ± 0.016 s [User: 5.250 s, System: 1.424 s] Range (min … max): 6.860 s … 6.905 s 10 runs after: Benchmark 1: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 6.868 s ± 0.027 s [User: 5.194 s, System: 1.466 s] Range (min … max): 6.828 s … 6.913 s 10 runs --- src/libstore/derivations.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 89a345057..2fafcb8e7 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -235,10 +235,10 @@ static void validatePath(std::string_view s) { throw FormatError("bad path '%1%' in derivation", s); } -static Path parsePath(StringViewStream & str) +static BackedStringView parsePath(StringViewStream & str) { - auto s = parseString(str).toOwned(); - validatePath(s); + auto s = parseString(str); + validatePath(*s); return s; } @@ -262,7 +262,7 @@ static StringSet parseStrings(StringViewStream & str, bool arePaths) StringSet res; expect(str, "["); while (!endOfList(str)) - res.insert(arePaths ? parsePath(str) : parseString(str).toOwned()); + res.insert((arePaths ? parsePath(str) : parseString(str)).toOwned()); return res; } @@ -434,9 +434,9 @@ Derivation parseDerivation( expect(str, ",["); while (!endOfList(str)) { expect(str, "("); - Path drvPath = parsePath(str); + auto drvPath = parsePath(str); expect(str, ","); - drv.inputDrvs.map.insert_or_assign(store.parseStorePath(drvPath), parseDerivedPathMapNode(store, str, version)); + drv.inputDrvs.map.insert_or_assign(store.parseStorePath(*drvPath), parseDerivedPathMapNode(store, str, version)); expect(str, ")"); } @@ -455,7 +455,7 @@ Derivation parseDerivation( expect(str, "("); auto name = parseString(str).toOwned(); expect(str, ","); auto value = parseString(str).toOwned(); expect(str, ")"); - drv.env[name] = value; + drv.env.insert_or_assign(std::move(name), std::move(value)); } expect(str, ")");