From fb05a6adcfe0362249bd16527a2e44ea2611e5f3 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 16 Jun 2020 12:45:36 -0400 Subject: [PATCH 01/27] Eliminate old TeeSink abstraction This was introduced in fa125b9b28bea25a4eeb4d39a71a481563127cb9, and then "reverted" in 1cf480110879ffc8aee94b4b75999da405b71d7c, except that revert left the struct around doing nothing useful. We're removing it all the way now because we want to make a new `TeeSink` complementing the already-exiting `TeeSource`, that is actually a completely different concept as far as the class hierarchy is concerned. --- src/libstore/daemon.cc | 7 ++++--- src/libstore/export-import.cc | 11 ++++++----- src/libutil/archive.hh | 7 ------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index e370e278c..b2e37c5b5 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -721,9 +721,10 @@ static void performOp(TunnelLogger * logger, ref store, if (GET_PROTOCOL_MINOR(clientVersion) >= 21) source = std::make_unique(from, to); else { - TeeSink tee(from); - parseDump(tee, tee.source); - saved = std::move(*tee.source.data); + TeeSource tee(from); + ParseSink sink; + parseDump(sink, tee); + saved = std::move(*tee.data); source = std::make_unique(saved); } diff --git a/src/libstore/export-import.cc b/src/libstore/export-import.cc index cb9da027d..0e33fb687 100644 --- a/src/libstore/export-import.cc +++ b/src/libstore/export-import.cc @@ -77,8 +77,9 @@ StorePaths Store::importPaths(Source & source, std::shared_ptr acces if (n != 1) throw Error("input doesn't look like something created by 'nix-store --export'"); /* Extract the NAR from the source. */ - TeeSink tee(source); - parseDump(tee, tee.source); + TeeSource tee(source); + ParseSink sink; + parseDump(sink, tee); uint32_t magic = readInt(source); if (magic != exportMagic) @@ -94,15 +95,15 @@ StorePaths Store::importPaths(Source & source, std::shared_ptr acces if (deriver != "") info.deriver = parseStorePath(deriver); - info.narHash = hashString(htSHA256, *tee.source.data); - info.narSize = tee.source.data->size(); + info.narHash = hashString(htSHA256, *tee.data); + info.narSize = tee.data->size(); // Ignore optional legacy signature. if (readInt(source) == 1) readString(source); // Can't use underlying source, which would have been exhausted - auto source = StringSource { *tee.source.data }; + auto source = StringSource { *tee.data }; addToStore(info, source, NoRepair, checkSigs, accessor); res.push_back(info.path); diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index 768fe2536..32d98a610 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -63,13 +63,6 @@ struct ParseSink virtual void createSymlink(const Path & path, const string & target) { }; }; -struct TeeSink : ParseSink -{ - TeeSource source; - - TeeSink(Source & source) : source(source) { } -}; - void parseDump(ParseSink & sink, Source & source); void restorePath(const Path & path, Source & source); From 289b9b8dcf8dc651c2d245d9328911f7addd1626 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 16 Jun 2020 15:14:11 -0400 Subject: [PATCH 02/27] Create a new TeeSink abstraction This is a bit complex because we want to expose extra functionality the wrapped class has. Perhaps there is some inheritancy trickery to do this nicer, but I don't know it, and this is the first thing we tried after a series of attempts that did build. This design is kind of like that of Rust's Writer, Reader, or Iter adapters, which impliment more traits based on what the inner type implements. --- src/libutil/compression.hh | 12 ++++++++++++ src/libutil/serialise.hh | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/libutil/compression.hh b/src/libutil/compression.hh index dd666a4e1..1bd118b47 100644 --- a/src/libutil/compression.hh +++ b/src/libutil/compression.hh @@ -25,4 +25,16 @@ MakeError(UnknownCompressionMethod, Error); MakeError(CompressionError, Error); +template<> +struct TeeSink> : CompressionSink +{ + MAKE_TEE_SINK(ref); + void finish() override { + orig->finish(); + } + void write(const unsigned char * data, size_t len) override { + return orig->write(data, len); + } +}; + } diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index a04118512..88a6b7ffe 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -181,6 +181,28 @@ struct TeeSource : Source } }; +#define MAKE_TEE_SINK(T) \ + T orig; \ + ref data; \ + TeeSink(T && orig) \ + : orig(std::move(orig)), data(make_ref()) { } \ + void operator () (const unsigned char * data, size_t len) { \ + this->data->append((const char *) data, len); \ + (*this->orig)(data, len); \ + } \ + void operator () (const std::string & s) \ + { \ + *data += s; \ + (*this->orig)(s); \ + } + +template +struct TeeSink : Sink +{ + MAKE_TEE_SINK(T); +}; + + /* A reader that consumes the original Source until 'size'. */ struct SizedSource : Source { From a835c740ca67e063fe47e7c31444b7c1cac0fe81 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 16 Jun 2020 15:14:11 -0400 Subject: [PATCH 03/27] Replace `TransferItem::status` with a local variable Everywhere seems to use `getHTTPStatus` now. --- src/libstore/filetransfer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 531b85af8..9566e0ae9 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -56,7 +56,6 @@ struct curlFileTransfer : public FileTransfer Callback callback; CURL * req = 0; bool active = false; // whether the handle has been added to the multi object - std::string status; unsigned int attempt = 0; @@ -175,6 +174,7 @@ struct curlFileTransfer : public FileTransfer size_t realSize = size * nmemb; std::string line((char *) contents, realSize); printMsg(lvlVomit, format("got header for '%s': %s") % request.uri % trim(line)); + std::string status; if (line.compare(0, 5, "HTTP/") == 0) { // new response starts result.etag = ""; auto ss = tokenizeString>(line, " "); From 004570a377b2df355064d48afc54375690c3cdb0 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 16 Jun 2020 15:14:11 -0400 Subject: [PATCH 04/27] Add HTTP responses to FileTransferErrors --- src/libstore/filetransfer.cc | 26 ++++++++++++++++++++------ src/libstore/filetransfer.hh | 5 +++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 9566e0ae9..d89f8388f 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -122,7 +122,7 @@ struct curlFileTransfer : public FileTransfer if (requestHeaders) curl_slist_free_all(requestHeaders); try { if (!done) - fail(FileTransferError(Interrupted, "download of '%s' was interrupted", request.uri)); + fail(FileTransferError(Interrupted, nullptr, "download of '%s' was interrupted", request.uri)); } catch (...) { ignoreException(); } @@ -152,8 +152,18 @@ struct curlFileTransfer : public FileTransfer size_t realSize = size * nmemb; result.bodySize += realSize; - if (!decompressionSink) + if (!decompressionSink) { decompressionSink = makeDecompressionSink(encoding, finalSink); + if (! successfulStatuses.count(getHTTPStatus())) { + // In this case we want to construct a TeeSink, to keep + // the response around (which we figure won't be big + // like an actual download should be) to improve error + // messages. + decompressionSink = std::make_shared>>( + ref{ decompressionSink } + ); + } + } (*decompressionSink)((unsigned char *) contents, realSize); @@ -408,16 +418,20 @@ struct curlFileTransfer : public FileTransfer attempt++; + std::shared_ptr response; + if (decompressionSink) + if (auto teeSink = std::dynamic_pointer_cast>>(decompressionSink)) + response = teeSink->data; auto exc = code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted - ? FileTransferError(Interrupted, fmt("%s of '%s' was interrupted", request.verb(), request.uri)) + ? FileTransferError(Interrupted, response, fmt("%s of '%s' was interrupted", request.verb(), request.uri)) : httpStatus != 0 - ? FileTransferError(err, + ? FileTransferError(err, response, fmt("unable to %s '%s': HTTP error %d", request.verb(), request.uri, httpStatus) + (code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) ) - : FileTransferError(err, + : FileTransferError(err, response, fmt("unable to %s '%s': %s (%d)", request.verb(), request.uri, curl_easy_strerror(code), code)); @@ -675,7 +689,7 @@ struct curlFileTransfer : public FileTransfer auto s3Res = s3Helper.getObject(bucketName, key); FileTransferResult res; if (!s3Res.data) - throw FileTransferError(NotFound, fmt("S3 object '%s' does not exist", request.uri)); + throw FileTransferError(NotFound, nullptr, fmt("S3 object '%s' does not exist", request.uri)); res.data = s3Res.data; callback(std::move(res)); #else diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 11dca2fe0..8e31a9e42 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -103,9 +103,10 @@ class FileTransferError : public Error { public: FileTransfer::Error error; + std::shared_ptr response; // intentionally optional template - FileTransferError(FileTransfer::Error error, const Args & ... args) - : Error(args...), error(error) + FileTransferError(FileTransfer::Error error, std::shared_ptr response, const Args & ... args) + : Error(args...), error(error), response(response) { } }; From 74b219ef6e5e171c56c8ad7385969e0d0df09ed8 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jun 2020 14:48:45 +0000 Subject: [PATCH 05/27] Adjust FileTransferError message to use opt response --- src/libstore/filetransfer.cc | 12 ++++++++++++ src/libstore/filetransfer.hh | 7 ++++--- src/libutil/error.hh | 3 +-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index d89f8388f..fa8ad33f5 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -834,6 +834,18 @@ void FileTransfer::download(FileTransferRequest && request, Sink & sink) } } +template +FileTransferError::FileTransferError(FileTransfer::Error error, std::shared_ptr response, const Args & ... args) + : Error(args...), error(error), response(response) +{ + const auto hf = hintfmt(args...); + if (response) { + err.hint = hintfmt("%1%\n\nresponse body:\n\n%2%", normaltxt(hf.str()), response); + } else { + err.hint = hf; + } +} + bool isUri(const string & s) { if (s.compare(0, 8, "channel:") == 0) return true; diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 8e31a9e42..25ade0add 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -104,10 +104,11 @@ class FileTransferError : public Error public: FileTransfer::Error error; std::shared_ptr response; // intentionally optional + template - FileTransferError(FileTransfer::Error error, std::shared_ptr response, const Args & ... args) - : Error(args...), error(error), response(response) - { } + FileTransferError(FileTransfer::Error error, std::shared_ptr response, const Args & ... args); + + virtual const char* sname() const override { return "FileTransferError"; } }; bool isUri(const string & s); diff --git a/src/libutil/error.hh b/src/libutil/error.hh index 1e6102ce1..ac9d2e494 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -173,9 +173,8 @@ public: template SysError(const Args & ... args) - :Error("") + : Error(""), errNo(errno) { - errNo = errno; auto hf = hintfmt(args...); err.hint = hintfmt("%1%: %2%", normaltxt(hf.str()), strerror(errNo)); } From 639e20dc3ed9c5b28138285653912de78fe0507f Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jun 2020 17:54:16 +0000 Subject: [PATCH 06/27] Prevent '%' in URL from causing crashes We have a larger problem that passsing computed strings to the first variable argument of many exception constructors is unsafe because that first variable argument is interpreted not as a plain string, but format string, and if it contains '%' boost::format will abort, since there are no arguments to the format string. In this particular instance '%' was used as part of an escape code in a URL, which, when the download failed, caused Nix to abort displaying the `FileTransferError`. --- src/libstore/filetransfer.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 531b85af8..e795da860 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -410,16 +410,13 @@ struct curlFileTransfer : public FileTransfer auto exc = code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted - ? FileTransferError(Interrupted, fmt("%s of '%s' was interrupted", request.verb(), request.uri)) + ? FileTransferError(Interrupted, "%s of '%s' was interrupted", request.verb(), request.uri) : httpStatus != 0 - ? FileTransferError(err, - fmt("unable to %s '%s': HTTP error %d", - request.verb(), request.uri, httpStatus) - + (code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) - ) - : FileTransferError(err, - fmt("unable to %s '%s': %s (%d)", - request.verb(), request.uri, curl_easy_strerror(code), code)); + ? FileTransferError(err, "unable to %s '%s': HTTP error %d%s", + request.verb(), request.uri, httpStatus, + code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) + : FileTransferError(err, "unable to %s '%s': %s (%d)", + request.verb(), request.uri, curl_easy_strerror(code), code); /* If this is a transient error, then maybe retry the download after a while. If we're writing to a @@ -675,7 +672,7 @@ struct curlFileTransfer : public FileTransfer auto s3Res = s3Helper.getObject(bucketName, key); FileTransferResult res; if (!s3Res.data) - throw FileTransferError(NotFound, fmt("S3 object '%s' does not exist", request.uri)); + throw FileTransferError(NotFound, "S3 object '%s' does not exist", request.uri); res.data = s3Res.data; callback(std::move(res)); #else From 1b23fe4afb8ed2c41604a1ed19cf3d49c34f46d1 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jun 2020 19:03:10 +0000 Subject: [PATCH 07/27] Fix bugs - Bad dynamic cast target ...classic - std::shared_ptr need explicit deref --- src/libstore/filetransfer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 9ae682bbb..8b66cbdad 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -420,7 +420,7 @@ struct curlFileTransfer : public FileTransfer std::shared_ptr response; if (decompressionSink) - if (auto teeSink = std::dynamic_pointer_cast>>(decompressionSink)) + if (auto teeSink = std::dynamic_pointer_cast>>(decompressionSink)) response = teeSink->data; auto exc = code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted @@ -837,7 +837,7 @@ FileTransferError::FileTransferError(FileTransfer::Error error, std::shared_ptr< { const auto hf = hintfmt(args...); if (response) { - err.hint = hintfmt("%1%\n\nresponse body:\n\n%2%", normaltxt(hf.str()), response); + err.hint = hintfmt("%1%\n\nresponse body:\n\n%2%", normaltxt(hf.str()), *response); } else { err.hint = hf; } From e197bc622948973fca192774b6cd8e0d3157aeb6 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 23 Jun 2020 11:12:01 -0400 Subject: [PATCH 08/27] Enable the --store option to take relative paths In nix commands which accept --store options, we can now specify a relative path, which will be canonicalized. --- src/libstore/store-api.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index e4a4ae11e..42ffa3ddb 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -864,7 +864,7 @@ StoreType getStoreType(const std::string & uri, const std::string & stateDir) { if (uri == "daemon") { return tDaemon; - } else if (uri == "local" || hasPrefix(uri, "/")) { + } else if (uri == "local" || hasPrefix(uri, "/") || hasPrefix(uri, "./")) { return tLocal; } else if (uri == "" || uri == "auto") { if (access(stateDir.c_str(), R_OK | W_OK) == 0) @@ -888,8 +888,11 @@ static RegisterStoreImplementation regStore([]( return std::shared_ptr(std::make_shared(params)); case tLocal: { Store::Params params2 = params; - if (hasPrefix(uri, "/")) + if (hasPrefix(uri, "/")) { params2["root"] = uri; + } else if (hasPrefix(uri, "./")) { + params2["root"] = absPath(uri); + } return std::shared_ptr(std::make_shared(params2)); } default: From 9ec10046e04a509cc982102f587cf06840f02327 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 16:06:24 +0000 Subject: [PATCH 09/27] Narrow scope of temporary value --- src/libstore/daemon.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index db7139374..6e0b290ed 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -375,21 +375,24 @@ static void performOp(TunnelLogger * logger, ref store, } case wopAddToStore: { - std::string s, baseName; + HashType hashAlgo; + std::string baseName; FileIngestionMethod method; { - bool fixed; uint8_t recursive; - from >> baseName >> fixed /* obsolete */ >> recursive >> s; + bool fixed; + uint8_t recursive; + std::string hashAlgoRaw; + from >> baseName >> fixed /* obsolete */ >> recursive >> hashAlgoRaw; if (recursive > (uint8_t) FileIngestionMethod::Recursive) throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive); method = FileIngestionMethod { recursive }; /* Compatibility hack. */ if (!fixed) { - s = "sha256"; + hashAlgoRaw = "sha256"; method = FileIngestionMethod::Recursive; } + hashAlgo = parseHashType(hashAlgoRaw); } - HashType hashAlgo = parseHashType(s); StringSink savedNAR; TeeSource savedNARSource(from, savedNAR); From c86fc3a9657096b74fe967f2f0bbd120e46908f6 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 15:55:04 +0000 Subject: [PATCH 10/27] Crudely make `addToStoreFromDump` take `Source` not string I just as little beyond the type as possible, so the implementation changes this enables can be reviewed separately. --- src/libstore/build.cc | 2 +- src/libstore/daemon.cc | 5 ++++- src/libstore/local-store.cc | 5 ++++- src/libstore/local-store.hh | 2 +- src/libstore/store-api.hh | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index ac2e67574..62294a08c 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2774,7 +2774,7 @@ struct RestrictedStore : public LocalFSStore goal.addDependency(info.path); } - StorePath addToStoreFromDump(const string & dump, const string & name, + StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override { auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair); diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 6e0b290ed..69d7ef511 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -410,8 +410,11 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); if (!savedRegular.regular) throw Error("regular file expected"); + StringSource dumpSource { + method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s + }; auto path = store->addToStoreFromDump( - method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s, + dumpSource, baseName, method, hashAlgo); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 26b226fe8..603f36352 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,9 +1033,12 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(const string & dump, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { + // FIXME: See if we can use the original source to reduce memory usage. + auto dump = dumpSource.drain(); + Hash h = hashString(hashAlgo, dump); auto dstPath = makeFixedOutputPath(method, h, name); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index c0e5d0286..355c2814f 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -153,7 +153,7 @@ public: in `dump', which is either a NAR serialisation (if recursive == true) or simply the contents of a regular file (if recursive == false). */ - StorePath addToStoreFromDump(const string & dump, const string & name, + StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override; StorePath addTextToStore(const string & name, const string & s, diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index a4be0411e..d1cb2035f 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -460,7 +460,7 @@ public: std::optional expectedCAHash = {}); // FIXME: remove? - virtual StorePath addToStoreFromDump(const string & dump, const string & name, + virtual StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) { throw Error("addToStoreFromDump() is not supported by this store"); From 9de96ef7d409fedea092045c4dbae7177f88962a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 19:03:39 +0000 Subject: [PATCH 11/27] Dedup `LocalStore::addToStore*` The downsides is that the coroutine has byte-by-byte loop transfer. Will fix that next. --- src/libstore/local-store.cc | 83 ++++++++++--------------------------- src/libstore/local-store.hh | 4 ++ 2 files changed, 25 insertions(+), 62 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 603f36352..925ac25bf 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,65 +1033,16 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { - // FIXME: See if we can use the original source to reduce memory usage. - auto dump = dumpSource.drain(); - - Hash h = hashString(hashAlgo, dump); - - auto dstPath = makeFixedOutputPath(method, h, name); - - addTempRoot(dstPath); - - if (repair || !isValidPath(dstPath)) { - - /* The first check above is an optimisation to prevent - unnecessary lock acquisition. */ - - auto realPath = Store::toRealPath(dstPath); - - PathLocks outputLock({realPath}); - - if (repair || !isValidPath(dstPath)) { - - deletePath(realPath); - - autoGC(); - - if (method == FileIngestionMethod::Recursive) { - StringSource source(dump); - restorePath(realPath, source); - } else - writeFile(realPath, dump); - - canonicalisePathMetaData(realPath, -1); - - /* Register the SHA-256 hash of the NAR serialisation of - the path in the database. We may just have computed it - above (if called with recursive == true and hashAlgo == - sha256); otherwise, compute it here. */ - HashResult hash; - if (method == FileIngestionMethod::Recursive) { - hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump); - hash.second = dump.size(); - } else - hash = hashPath(htSHA256, realPath); - - optimisePath(realPath); // FIXME: combine with hashPath() - - ValidPathInfo info(dstPath); - info.narHash = hash.first; - info.narSize = hash.second; - info.ca = FixedOutputHash { .method = method, .hash = h }; - registerValidPath(info); + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + while (1) { + uint8_t buf[1]; + auto n = dump.read(buf, 1); + sink(buf, n); } - - outputLock.setDeletion(true); - } - - return dstPath; + }); } @@ -1100,6 +1051,19 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + if (method == FileIngestionMethod::Recursive) + dumpPath(srcPath, sink, filter); + else + readFile(srcPath, sink); + }); +} + + +StorePath LocalStore::addToStoreCommon( + const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, + std::function demux) +{ /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); @@ -1120,7 +1084,6 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, std::string nar; auto source = sinkToSource([&](Sink & sink) { - LambdaSink sink2([&](const unsigned char * buf, size_t len) { (*sha256Sink)(buf, len); if (hashSink) (*hashSink)(buf, len); @@ -1138,11 +1101,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (!inMemory) sink(buf, len); }); - - if (method == FileIngestionMethod::Recursive) - dumpPath(srcPath, sink2, filter); - else - readFile(srcPath, sink2); + demux(sink2); }); std::unique_ptr delTempDir; diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 355c2814f..215731f87 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -290,6 +290,10 @@ private: specified by the ‘secret-key-files’ option. */ void signPathInfo(ValidPathInfo & info); + StorePath addToStoreCommon( + const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, + std::function demux); + Path getRealStoreDir() override { return realStoreDir; } void createUser(const std::string & userName, uid_t userId) override; From 592851fb67cd15807109d6f65fb81f6af89af966 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 23:40:49 +0000 Subject: [PATCH 12/27] LocalStore::addToStoreFromDump copy in chunks Rather than copying byte-by-byte, we let the coroutine know how much data we would like it to send back to us. --- src/libstore/local-store.cc | 16 +++++++++------- src/libstore/local-store.hh | 2 +- src/libutil/serialise.cc | 33 ++++++++++++++++++++------------- src/libutil/serialise.hh | 11 ++++++++++- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 925ac25bf..dac7a50c4 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1036,11 +1036,13 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) { while (1) { - uint8_t buf[1]; - auto n = dump.read(buf, 1); + constexpr size_t bufSize = 1024; + uint8_t buf[bufSize]; + auto n = dump.read(buf, std::min(wanted, bufSize)); sink(buf, n); + // when control is yielded back to us wanted will be updated. } }); } @@ -1051,7 +1053,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else @@ -1062,7 +1064,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, StorePath LocalStore::addToStoreCommon( const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux) + std::function demux) { /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); @@ -1083,7 +1085,7 @@ StorePath LocalStore::addToStoreCommon( bool inMemory = true; std::string nar; - auto source = sinkToSource([&](Sink & sink) { + auto source = sinkToSource([&](Sink & sink, size_t & wanted) { LambdaSink sink2([&](const unsigned char * buf, size_t len) { (*sha256Sink)(buf, len); if (hashSink) (*hashSink)(buf, len); @@ -1101,7 +1103,7 @@ StorePath LocalStore::addToStoreCommon( if (!inMemory) sink(buf, len); }); - demux(sink2); + demux(sink2, wanted); }); std::unique_ptr delTempDir; diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 215731f87..ae23004c4 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -292,7 +292,7 @@ private: StorePath addToStoreCommon( const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux); + std::function demux); Path getRealStoreDir() override { return realStoreDir; } diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index c8b71188f..141e9e976 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -165,35 +165,43 @@ size_t StringSource::read(unsigned char * data, size_t len) #endif std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof) { struct SinkToSource : Source { - typedef boost::coroutines2::coroutine coro_t; + typedef boost::coroutines2::coroutine> coro_t; - std::function fun; + std::function fun; std::function eof; std::optional coro; bool started = false; - SinkToSource(std::function fun, std::function eof) + /* It would be nicer to have the co-routines have both args and a + return value, but unfortunately that was removed from Boost's + implementation for some reason, so we use some extra state instead. + */ + size_t wanted = 0; + + SinkToSource(std::function fun, std::function eof) : fun(fun), eof(eof) { } - std::string cur; + std::basic_string cur; size_t pos = 0; size_t read(unsigned char * data, size_t len) override { - if (!coro) + wanted = len < cur.size() ? 0 : len - cur.size(); + if (!coro) { coro = coro_t::pull_type([&](coro_t::push_type & yield) { - LambdaSink sink([&](const unsigned char * data, size_t len) { - if (len) yield(std::string((const char *) data, len)); + LambdaSink sink([&](const uint8_t * data, size_t len) { + if (len) yield(std::basic_string { data, len }); }); - fun(sink); + fun(sink, wanted); }); + } if (!*coro) { eof(); abort(); } @@ -203,11 +211,10 @@ std::unique_ptr sinkToSource( pos = 0; } - auto n = std::min(cur.size() - pos, len); - memcpy(data, (unsigned char *) cur.data() + pos, n); - pos += n; + auto numCopied = cur.copy(data, len, pos); + pos += numCopied; - return n; + return numCopied; } }; diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 8386a4991..6cb9d1bf5 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -260,11 +260,20 @@ struct LambdaSource : Source /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof = []() { throw EndOfFile("coroutine has finished"); }); +static inline std::unique_ptr sinkToSource( + std::function fun, + std::function eof = []() { + throw EndOfFile("coroutine has finished"); + }) +{ + return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); +} + void writePadding(size_t len, Sink & sink); void writeString(const unsigned char * buf, size_t len, Sink & sink); From 8173e7bfefc6a5771b2c9ec48bd6edd3b161dd90 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 14 Jul 2020 21:11:08 +0000 Subject: [PATCH 13/27] Fix localhost::addToStore(...Path...) We were calculating the nar hash wrong when the file ingestion method was flat. I don't think there's anything we can do in that case but dump the file again, so that's what I do. As an optomization, we again could reuse the original dump for just the recursive and non-sha256 case, but I rather do that after this fix, and after my other PRs which deduplicate this code. --- src/libstore/local-store.cc | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 26b226fe8..5827dfc58 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1097,15 +1097,8 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - /* For computing the NAR hash. */ - auto sha256Sink = std::make_unique(htSHA256); - - /* For computing the store path. In recursive SHA-256 mode, this - is the same as the NAR hash, so no need to do it again. */ - std::unique_ptr hashSink = - method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 - ? nullptr - : std::make_unique(hashAlgo); + /* For computing the store path. */ + auto hashSink = std::make_unique(hashAlgo); /* Read the source path into memory, but only if it's up to narBufferSize bytes. If it's larger, write it to a temporary @@ -1114,13 +1107,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, temporary path. Otherwise, we move it to the destination store path. */ bool inMemory = true; - std::string nar; + std::string nar; // TODO rename from "nar" to "dump" auto source = sinkToSource([&](Sink & sink) { LambdaSink sink2([&](const unsigned char * buf, size_t len) { - (*sha256Sink)(buf, len); - if (hashSink) (*hashSink)(buf, len); + (*hashSink)(buf, len); if (inMemory) { if (nar.size() + len > settings.narBufferSize) { @@ -1165,9 +1157,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, /* The NAR fits in memory, so we didn't do restorePath(). */ } - auto sha256 = sha256Sink->finish(); - - Hash hash = hashSink ? hashSink->finish().first : sha256.first; + auto [hash, size] = hashSink->finish(); auto dstPath = makeFixedOutputPath(method, hash, name); @@ -1201,13 +1191,22 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, throw Error("renaming '%s' to '%s'", tempPath, realPath); } + /* For computing the nar hash. In recursive SHA-256 mode, this + is the same as the store hash, so no need to do it again. */ + auto narHash = std::pair { hash, size }; + if (method != FileIngestionMethod::Recursive || hashAlgo != htSHA256) { + HashSink narSink { htSHA256 }; + dumpPath(realPath, narSink); + narHash = narSink.finish(); + } + canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath optimisePath(realPath); ValidPathInfo info(dstPath); - info.narHash = sha256.first; - info.narSize = sha256.second; + info.narHash = narHash.first; + info.narSize = narHash.second; info.ca = FixedOutputHash { .method = method, .hash = hash }; registerValidPath(info); } From 650c2c655810c375296b52997e2f85298c7c566a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 14 Jul 2020 21:28:50 +0000 Subject: [PATCH 14/27] Rename variable `nar` -> `dump` according to TODO --- src/libstore/local-store.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 5827dfc58..cd92f138c 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1107,7 +1107,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, temporary path. Otherwise, we move it to the destination store path. */ bool inMemory = true; - std::string nar; // TODO rename from "nar" to "dump" + std::string dump; auto source = sinkToSource([&](Sink & sink) { @@ -1115,13 +1115,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, (*hashSink)(buf, len); if (inMemory) { - if (nar.size() + len > settings.narBufferSize) { + if (dump.size() + len > settings.narBufferSize) { inMemory = false; sink << 1; - sink((const unsigned char *) nar.data(), nar.size()); - nar.clear(); + sink((const unsigned char *) dump.data(), dump.size()); + dump.clear(); } else { - nar.append((const char *) buf, len); + dump.append((const char *) buf, len); } } @@ -1180,7 +1180,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (inMemory) { /* Restore from the NAR in memory. */ - StringSource source(nar); + StringSource source(dump); if (method == FileIngestionMethod::Recursive) restorePath(realPath, source); else From d087cf48552ee82e2bc78bb6c99854bab350ee00 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 21:10:33 +0000 Subject: [PATCH 15/27] Revert "Revert "LocalStore::addToStore(srcPath): Handle the flat case"" This reverts commit cff2157185912025c24a1b9dc99056161634176c. --- src/libstore/local-store.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b3f4b3f7d..26b226fe8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1097,16 +1097,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - if (method != FileIngestionMethod::Recursive) - return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair); - /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); /* For computing the store path. In recursive SHA-256 mode, this is the same as the NAR hash, so no need to do it again. */ std::unique_ptr hashSink = - hashAlgo == htSHA256 + method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 ? nullptr : std::make_unique(hashAlgo); @@ -1139,7 +1136,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (!inMemory) sink(buf, len); }); - dumpPath(srcPath, sink2, filter); + if (method == FileIngestionMethod::Recursive) + dumpPath(srcPath, sink2, filter); + else + readFile(srcPath, sink2); }); std::unique_ptr delTempDir; @@ -1155,7 +1155,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - restorePath(tempPath, *source); + if (method == FileIngestionMethod::Recursive) + restorePath(tempPath, *source); + else + writeFile(tempPath, *source); } catch (EndOfFile &) { if (!inMemory) throw; @@ -1188,7 +1191,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (inMemory) { /* Restore from the NAR in memory. */ StringSource source(nar); - restorePath(realPath, source); + if (method == FileIngestionMethod::Recursive) + restorePath(realPath, source); + else + writeFile(realPath, source); } else { /* Move the temporary path we restored above. */ if (rename(tempPath.c_str(), realPath.c_str())) From bc109648c41f8021707b55b815e68a890a09f2f6 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 23:14:30 +0000 Subject: [PATCH 16/27] Get rid of `LocalStore::addToStoreCommon` I got it to just become `LocalStore::addToStoreFromDump`, cleanly taking a store and then doing nothing too fancy with it. `LocalStore::addToStore(...Path...)` is now just a simple wrapper with a bare-bones sinkToSource of the right dump command. --- src/libstore/local-store.cc | 93 ++++++++++++++++--------------------- src/libstore/local-store.hh | 4 -- src/libutil/serialise.cc | 13 ++++++ src/libutil/serialise.hh | 15 +++++- 4 files changed, 67 insertions(+), 58 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b9fae6089..07e1679da 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,38 +1033,22 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, - FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) -{ - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) { - while (1) { - constexpr size_t bufSize = 1024; - uint8_t buf[bufSize]; - auto n = dump.read(buf, std::min(wanted, bufSize)); - sink(buf, n); - // when control is yielded back to us wanted will be updated. - } - }); -} - - StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair) { Path srcPath(absPath(_srcPath)); - - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) { + auto source = sinkToSource([&](Sink & sink, size_t & wanted) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else readFile(srcPath, sink); }); + return addToStoreFromDump(*source, name, method, hashAlgo, repair); } -StorePath LocalStore::addToStoreCommon( - const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux) +StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, + FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { /* For computing the store path. */ auto hashSink = std::make_unique(hashAlgo); @@ -1075,50 +1059,53 @@ StorePath LocalStore::addToStoreCommon( destination store path is already valid, we just delete the temporary path. Otherwise, we move it to the destination store path. */ - bool inMemory = true; + bool inMemory = false; + std::string dump; - auto source = sinkToSource([&](Sink & sink, size_t & wanted) { - LambdaSink sink2([&](const unsigned char * buf, size_t len) { - (*hashSink)(buf, len); - - if (inMemory) { - if (dump.size() + len > settings.narBufferSize) { - inMemory = false; - sink << 1; - sink((const unsigned char *) dump.data(), dump.size()); - dump.clear(); - } else { - dump.append((const char *) buf, len); - } - } - - if (!inMemory) sink(buf, len); - }); - demux(sink2, wanted); - }); + /* Fill out buffer, and decide whether we are working strictly in + memory based on whether we break out because the buffer is full + or the original source is empty */ + while (dump.size() < settings.narBufferSize) { + auto oldSize = dump.size(); + constexpr size_t chunkSize = 1024; + auto want = std::min(chunkSize, settings.narBufferSize - oldSize); + dump.resize(oldSize + want); + auto got = 0; + try { + got = source.read((uint8_t *) dump.data() + oldSize, want); + } catch (EndOfFile &) { + inMemory = true; + break; + } + /* Start hashing as we get data */ + (*hashSink)((const uint8_t *) dump.data() + oldSize, got); + dump.resize(oldSize + got); + } std::unique_ptr delTempDir; Path tempPath; - try { - /* Wait for the source coroutine to give us some dummy - data. This is so that we don't create the temporary - directory if the NAR fits in memory. */ - readInt(*source); + if (!inMemory) { + StringSource dumpSource { dump }; + TeeSource rest { source, *hashSink }; + ChainSource bothSource { + .source1 = dumpSource, + /* Continue hashing what's left, but don't rehash what we + already did. */ + .source2 = rest, + }; auto tempDir = createTempDir(realStoreDir, "add"); delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; if (method == FileIngestionMethod::Recursive) - restorePath(tempPath, *source); + restorePath(tempPath, bothSource); else - writeFile(tempPath, *source); + writeFile(tempPath, bothSource); - } catch (EndOfFile &) { - if (!inMemory) throw; - /* The NAR fits in memory, so we didn't do restorePath(). */ + dump.clear(); } auto [hash, size] = hashSink->finish(); @@ -1143,12 +1130,12 @@ StorePath LocalStore::addToStoreCommon( autoGC(); if (inMemory) { + StringSource dumpSource { dump }; /* Restore from the NAR in memory. */ - StringSource source(dump); if (method == FileIngestionMethod::Recursive) - restorePath(realPath, source); + restorePath(realPath, dumpSource); else - writeFile(realPath, source); + writeFile(realPath, dumpSource); } else { /* Move the temporary path we restored above. */ if (rename(tempPath.c_str(), realPath.c_str())) diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ae23004c4..355c2814f 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -290,10 +290,6 @@ private: specified by the ‘secret-key-files’ option. */ void signPathInfo(ValidPathInfo & info); - StorePath addToStoreCommon( - const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux); - Path getRealStoreDir() override { return realStoreDir; } void createUser(const std::string & userName, uid_t userId) override; diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 141e9e976..4c72dc9f2 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -329,5 +329,18 @@ void StringSink::operator () (const unsigned char * data, size_t len) s->append((const char *) data, len); } +size_t ChainSource::read(unsigned char * data, size_t len) +{ + if (useSecond) { + return source2.read(data, len); + } else { + try { + return source1.read(data, len); + } catch (EndOfFile &) { + useSecond = true; + return this->read(data, len); + } + } +} } diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 6cb9d1bf5..3e3735ca5 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -256,6 +256,19 @@ struct LambdaSource : Source } }; +/* Chain two sources together so after the first is exhausted, the second is + used */ +struct ChainSource : Source +{ + Source & source1, & source2; + bool useSecond = false; + ChainSource(Source & s1, Source & s2) + : source1(s1), source2(s2) + { } + + size_t read(unsigned char * data, size_t len) override; +}; + /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ @@ -271,7 +284,7 @@ static inline std::unique_ptr sinkToSource( throw EndOfFile("coroutine has finished"); }) { - return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); + return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); } From 5602637d9ea195784368e99a226718fc95e6b978 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 23:19:41 +0000 Subject: [PATCH 17/27] Revert "LocalStore::addToStoreFromDump copy in chunks" This reverts commit 592851fb67cd15807109d6f65fb81f6af89af966. We don't need this extra feature anymore --- src/libstore/local-store.cc | 2 +- src/libutil/serialise.cc | 33 +++++++++++++-------------------- src/libutil/serialise.hh | 11 +---------- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 07e1679da..b2b5afadd 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1037,7 +1037,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair) { Path srcPath(absPath(_srcPath)); - auto source = sinkToSource([&](Sink & sink, size_t & wanted) { + auto source = sinkToSource([&](Sink & sink) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 4c72dc9f2..00c945113 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -165,43 +165,35 @@ size_t StringSource::read(unsigned char * data, size_t len) #endif std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof) { struct SinkToSource : Source { - typedef boost::coroutines2::coroutine> coro_t; + typedef boost::coroutines2::coroutine coro_t; - std::function fun; + std::function fun; std::function eof; std::optional coro; bool started = false; - /* It would be nicer to have the co-routines have both args and a - return value, but unfortunately that was removed from Boost's - implementation for some reason, so we use some extra state instead. - */ - size_t wanted = 0; - - SinkToSource(std::function fun, std::function eof) + SinkToSource(std::function fun, std::function eof) : fun(fun), eof(eof) { } - std::basic_string cur; + std::string cur; size_t pos = 0; size_t read(unsigned char * data, size_t len) override { - wanted = len < cur.size() ? 0 : len - cur.size(); - if (!coro) { + if (!coro) coro = coro_t::pull_type([&](coro_t::push_type & yield) { - LambdaSink sink([&](const uint8_t * data, size_t len) { - if (len) yield(std::basic_string { data, len }); + LambdaSink sink([&](const unsigned char * data, size_t len) { + if (len) yield(std::string((const char *) data, len)); }); - fun(sink, wanted); + fun(sink); }); - } if (!*coro) { eof(); abort(); } @@ -211,10 +203,11 @@ std::unique_ptr sinkToSource( pos = 0; } - auto numCopied = cur.copy(data, len, pos); - pos += numCopied; + auto n = std::min(cur.size() - pos, len); + memcpy(data, (unsigned char *) cur.data() + pos, n); + pos += n; - return numCopied; + return n; } }; diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 3e3735ca5..aa6b42597 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -273,19 +273,10 @@ struct ChainSource : Source /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ std::unique_ptr sinkToSource( - std::function fun, - std::function eof = []() { - throw EndOfFile("coroutine has finished"); - }); - -static inline std::unique_ptr sinkToSource( std::function fun, std::function eof = []() { throw EndOfFile("coroutine has finished"); - }) -{ - return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); -} + }); void writePadding(size_t len, Sink & sink); From 3dcca18c30cbc09652f5ac644a9f8750f9ced0c9 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 16 Jul 2020 13:39:03 +0000 Subject: [PATCH 18/27] Fix bug in TeeSource We use this to simplify `LocalStore::addToStoreFromDump`. Also, hope I fixed build error with old clang (used in Darwin CI). --- src/libstore/local-store.cc | 14 ++++---------- src/libutil/serialise.hh | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b2b5afadd..96d10d9ba 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1047,11 +1047,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, } -StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { /* For computing the store path. */ auto hashSink = std::make_unique(hashAlgo); + TeeSource source { source0, *hashSink }; /* Read the source path into memory, but only if it's up to narBufferSize bytes. If it's larger, write it to a temporary @@ -1078,8 +1079,6 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, inMemory = true; break; } - /* Start hashing as we get data */ - (*hashSink)((const uint8_t *) dump.data() + oldSize, got); dump.resize(oldSize + got); } @@ -1087,14 +1086,9 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, Path tempPath; if (!inMemory) { + /* Drain what we pulled so far, and then keep on pulling */ StringSource dumpSource { dump }; - TeeSource rest { source, *hashSink }; - ChainSource bothSource { - .source1 = dumpSource, - /* Continue hashing what's left, but don't rehash what we - already did. */ - .source2 = rest, - }; + ChainSource bothSource { dumpSource, source }; auto tempDir = createTempDir(realStoreDir, "add"); delTempDir = std::make_unique(tempDir); diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index aa6b42597..5d9acf887 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -189,7 +189,7 @@ struct TeeSource : Source size_t read(unsigned char * data, size_t len) { size_t n = orig.read(data, len); - sink(data, len); + sink(data, n); return n; } }; From 4178f36a1d4005a78a024c60d1f024c6ecccf8e8 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Fri, 17 Jul 2020 15:50:53 -0400 Subject: [PATCH 19/27] Test relative store paths --- tests/local-store.sh | 20 ++++++++++++++++++++ tests/local.mk | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/local-store.sh diff --git a/tests/local-store.sh b/tests/local-store.sh new file mode 100644 index 000000000..4ec3d64b0 --- /dev/null +++ b/tests/local-store.sh @@ -0,0 +1,20 @@ +source common.sh + +cd $TEST_ROOT + +echo example > example.txt +mkdir -p ./x + +NIX_STORE_DIR=$TEST_ROOT/x + +CORRECT_PATH=$(nix-store --store ./x --add example.txt) + +PATH1=$(nix path-info --store ./x $CORRECT_PATH) +[ $CORRECT_PATH == $PATH1 ] + +PATH2=$(nix path-info --store "$PWD/x" $CORRECT_PATH) +[ $CORRECT_PATH == $PATH2 ] + +# FIXME we could also test the query parameter version: +# PATH3=$(nix path-info --store "local?store=$PWD/x" $CORRECT_PATH) +# [ $CORRECT_PATH == $PATH3 ] diff --git a/tests/local.mk b/tests/local.mk index 81366160b..0f3bfe606 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -6,7 +6,7 @@ nix_tests = \ gc-auto.sh \ referrers.sh user-envs.sh logging.sh nix-build.sh misc.sh fixed.sh \ gc-runtime.sh check-refs.sh filter-source.sh \ - remote-store.sh export.sh export-graph.sh \ + local-store.sh remote-store.sh export.sh export-graph.sh \ timeout.sh secure-drv-outputs.sh nix-channel.sh \ multiple-outputs.sh import-derivation.sh fetchurl.sh optimise-store.sh \ binary-cache.sh nix-profile.sh repair.sh dump-db.sh case-hack.sh \ From f0100f55909d00f15bfdbef89d6cdcf6c38b2f59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sat, 18 Jul 2020 08:48:13 +0100 Subject: [PATCH 20/27] README: improve development docs --- README.md | 25 +++-------------------- doc/manual/hacking.xml | 45 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index a1588284d..e5f7a694f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ for more details. On Linux and macOS the easiest way to Install Nix is to run the following shell command (as a user other than root): -``` +```console $ curl -L https://nixos.org/nix/install | sh ``` @@ -20,27 +20,8 @@ Information on additional installation methods is available on the [Nix download ## Building And Developing -### Building Nix - -You can build Nix using one of the targets provided by [release.nix](./release.nix): - -``` -$ nix-build ./release.nix -A build.aarch64-linux -$ nix-build ./release.nix -A build.x86_64-darwin -$ nix-build ./release.nix -A build.i686-linux -$ nix-build ./release.nix -A build.x86_64-linux -``` - -### Development Environment - -You can use the provided `shell.nix` to get a working development environment: - -``` -$ nix-shell -$ ./bootstrap.sh -$ ./configure -$ make -``` +See our [Hacking guide](hydra.nixos.org/job/nix/master/build.x86_64-linux/latest/download-by-type/doc/manual#chap-hacking) in our manual for instruction on how to +build nix from source with nix-build or how to get a development environment. ## Additional Resources diff --git a/doc/manual/hacking.xml b/doc/manual/hacking.xml index b671811d3..c0ece76b6 100644 --- a/doc/manual/hacking.xml +++ b/doc/manual/hacking.xml @@ -4,18 +4,37 @@ Hacking -This section provides some notes on how to hack on Nix. To get +This section provides some notes on how to hack on Nix. To get the latest version of Nix from GitHub: -$ git clone git://github.com/NixOS/nix.git +$ git clone https://github.com/NixOS/nix.git $ cd nix -To build it and its dependencies: +To build Nix for the current operating system/architecture use + -$ nix-build release.nix -A build.x86_64-linux +$ nix-build + +or if you have a flakes-enabled nix: + + +$ nix build + + +This will build defaultPackage attribute defined in the flake.nix file. + +To build for other platforms add one of the following suffixes to it: aarch64-linux, +i686-linux, x86_64-darwin, x86_64-linux. + +i.e. + + +nix-build -A defaultPackage.x86_64-linux + + To build all dependencies and start a shell in which all @@ -27,13 +46,27 @@ $ nix-shell To build Nix itself in this shell: [nix-shell]$ ./bootstrap.sh -[nix-shell]$ configurePhase -[nix-shell]$ make +[nix-shell]$ ./configure $configureFlags +[nix-shell]$ make -j $NIX_BUILD_CORES To install it in $(pwd)/inst and test it: [nix-shell]$ make install [nix-shell]$ make installcheck +[nix-shell]$ ./inst/bin/nix --version +nix (Nix) 2.4 + + +If you have a flakes-enabled nix you can replace: + + +$ nix-shell + + +by: + + +$ nix develop From 0ca9744694a5294e995fcddc11f5f195c84036a4 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Mon, 20 Jul 2020 15:56:52 -0400 Subject: [PATCH 21/27] Use heuristics to decide when to show the response Due to https://github.com/NixOS/nix/issues/3841 we don't know how print different messages for different verbosity levels. --- src/libstore/filetransfer.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 4d35bd2b5..4149f8155 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -845,8 +845,11 @@ FileTransferError::FileTransferError(FileTransfer::Error error, std::shared_ptr< : Error(args...), error(error), response(response) { const auto hf = hintfmt(args...); - if (response) { - err.hint = hintfmt("%1%\n\nresponse body:\n\n%2%", normaltxt(hf.str()), *response); + // FIXME: Due to https://github.com/NixOS/nix/issues/3841 we don't know how + // to print different messages for different verbosity levels. For now + // we add some heuristics for detecting when we want to show the response. + if (response && (response->size() < 1024 || response->find("") != string::npos)) { + err.hint = hintfmt("%1%\n\nresponse body:\n\n%2%", normaltxt(hf.str()), *response); } else { err.hint = hf; } From 6633605341c6e01bc72b8311e478ed9932719e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 20 Jul 2020 22:30:39 +0100 Subject: [PATCH 22/27] Update doc/manual/hacking.xml Co-authored-by: Eelco Dolstra --- doc/manual/hacking.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/manual/hacking.xml b/doc/manual/hacking.xml index c0ece76b6..d25d4b84a 100644 --- a/doc/manual/hacking.xml +++ b/doc/manual/hacking.xml @@ -12,7 +12,7 @@ $ cd nix -To build Nix for the current operating system/architecture use +To build Nix for the current operating system/architecture use $ nix-build From 922a845ffc4eaa51797bc376d237c6216f0d8391 Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Tue, 21 Jul 2020 10:24:19 -0400 Subject: [PATCH 23/27] Update chunkSize to the suggested value This was a suggested course of action in a review in one of our earlier commits, https://github.com/NixOS/nix/pull/3801#discussion_r457557079 --- src/libstore/local-store.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 340fb5306..9ea71170f 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1069,7 +1069,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name, or the original source is empty */ while (dump.size() < settings.narBufferSize) { auto oldSize = dump.size(); - constexpr size_t chunkSize = 1024; + constexpr size_t chunkSize = 65536; auto want = std::min(chunkSize, settings.narBufferSize - oldSize); dump.resize(oldSize + want); auto got = 0; From 6cce32c8e8b9559f197f6d3e8814796cfc490582 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 21 Jul 2020 15:39:47 +0000 Subject: [PATCH 24/27] Change logic for deciding what is a relative path for the local store The was Eelco's prefered logic, and it looks good to me! --- src/libstore/store-api.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index d37e970df..67ce6e66c 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -914,12 +914,20 @@ ref openStore(const std::string & uri_, throw Error("don't know how to open Nix store '%s'", uri); } +static bool isNonUriPath(const std::string & spec) { + return + // is not a URL + spec.find("://") == std::string::npos + // Has at least one path separator, and so isn't a single word that + // might be special like "auto" + && spec.find("/") != std::string::npos; +} StoreType getStoreType(const std::string & uri, const std::string & stateDir) { if (uri == "daemon") { return tDaemon; - } else if (uri == "local" || hasPrefix(uri, "/") || hasPrefix(uri, "./")) { + } else if (uri == "local" || isNonUriPath(uri)) { return tLocal; } else if (uri == "" || uri == "auto") { if (access(stateDir.c_str(), R_OK | W_OK) == 0) @@ -943,9 +951,7 @@ static RegisterStoreImplementation regStore([]( return std::shared_ptr(std::make_shared(params)); case tLocal: { Store::Params params2 = params; - if (hasPrefix(uri, "/")) { - params2["root"] = uri; - } else if (hasPrefix(uri, "./")) { + if (isNonUriPath(uri)) { params2["root"] = absPath(uri); } return std::shared_ptr(std::make_shared(params2)); From ae9e9753ce9100ac8ac98fb76d4ac9de45b5734c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20Kemetm=C3=BCller?= Date: Wed, 22 Jul 2020 13:45:15 +0200 Subject: [PATCH 25/27] README: Fix link to hacking guide The link was previously interpreted as if it were relative to the current file. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e5f7a694f..03c5deb7b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Information on additional installation methods is available on the [Nix download ## Building And Developing -See our [Hacking guide](hydra.nixos.org/job/nix/master/build.x86_64-linux/latest/download-by-type/doc/manual#chap-hacking) in our manual for instruction on how to +See our [Hacking guide](https://hydra.nixos.org/job/nix/master/build.x86_64-linux/latest/download-by-type/doc/manual#chap-hacking) in our manual for instruction on how to build nix from source with nix-build or how to get a development environment. ## Additional Resources From c56356baccbf006c7835afa9ce58012631e90b31 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 22 Jul 2020 21:37:54 +0000 Subject: [PATCH 26/27] Separate concerns in `scanForReferences` with TeeSink This also will make it easier to use a `HashModuloSink` instead for CA derivations. --- src/libstore/references.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/libstore/references.cc b/src/libstore/references.cc index a10d536a3..968e2e425 100644 --- a/src/libstore/references.cc +++ b/src/libstore/references.cc @@ -48,13 +48,12 @@ static void search(const unsigned char * s, size_t len, struct RefScanSink : Sink { - HashSink hashSink; StringSet hashes; StringSet seen; string tail; - RefScanSink() : hashSink(htSHA256) { } + RefScanSink() { } void operator () (const unsigned char * data, size_t len); }; @@ -62,8 +61,6 @@ struct RefScanSink : Sink void RefScanSink::operator () (const unsigned char * data, size_t len) { - hashSink(data, len); - /* It's possible that a reference spans the previous and current fragment, so search in the concatenation of the tail of the previous fragment and the start of the current fragment. */ @@ -82,7 +79,9 @@ void RefScanSink::operator () (const unsigned char * data, size_t len) PathSet scanForReferences(const string & path, const PathSet & refs, HashResult & hash) { - RefScanSink sink; + RefScanSink refsSink; + HashSink hashSink { htSHA256 }; + TeeSink sink { refsSink, hashSink }; std::map backMap; /* For efficiency (and a higher hit rate), just search for the @@ -97,7 +96,7 @@ PathSet scanForReferences(const string & path, assert(s.size() == refLength); assert(backMap.find(s) == backMap.end()); // parseHash(htSHA256, s); - sink.hashes.insert(s); + refsSink.hashes.insert(s); backMap[s] = i; } @@ -106,13 +105,13 @@ PathSet scanForReferences(const string & path, /* Map the hashes found back to their store paths. */ PathSet found; - for (auto & i : sink.seen) { + for (auto & i : refsSink.seen) { std::map::iterator j; if ((j = backMap.find(i)) == backMap.end()) abort(); found.insert(j->second); } - hash = sink.hashSink.finish(); + hash = hashSink.finish(); return found; } From b9ead08ca8f3b8e693a4528d0c6f642dcee026fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20M=C3=B6ller?= Date: Thu, 23 Jul 2020 14:21:27 +0200 Subject: [PATCH 27/27] Save changes made by "nix registry pin" to user registry --- src/nix/registry.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nix/registry.cc b/src/nix/registry.cc index 16d7e511f..ebee4545c 100644 --- a/src/nix/registry.cc +++ b/src/nix/registry.cc @@ -111,6 +111,7 @@ struct CmdRegistryPin : virtual Args, EvalCommand fetchers::Attrs extraAttrs; if (ref.subdir != "") extraAttrs["dir"] = ref.subdir; userRegistry->add(ref.input, resolved, extraAttrs); + userRegistry->write(fetchers::getUserRegistryPath()); } };