From 57db3be9e448042814d386def2d8af16a2a4c4b9 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 20 Oct 2023 16:36:41 +0200 Subject: [PATCH] SourceAccessor::readFile(): Support reading into a sink --- src/libfetchers/fs-input-accessor.cc | 7 +++- src/libfetchers/input-accessor.cc | 2 +- src/libutil/archive.cc | 15 ++++--- src/libutil/source-accessor.cc | 58 +++++++++++++++++++++++++++- src/libutil/source-accessor.hh | 25 +++++++++++- 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/src/libfetchers/fs-input-accessor.cc b/src/libfetchers/fs-input-accessor.cc index e40faf03f..3444c4643 100644 --- a/src/libfetchers/fs-input-accessor.cc +++ b/src/libfetchers/fs-input-accessor.cc @@ -19,11 +19,14 @@ struct FSInputAccessorImpl : FSInputAccessor, PosixSourceAccessor { } - std::string readFile(const CanonPath & path) override + void readFile( + const CanonPath & path, + Sink & sink, + std::function sizeCallback) override { auto absPath = makeAbsPath(path); checkAllowed(absPath); - return PosixSourceAccessor::readFile(absPath); + PosixSourceAccessor::readFile(absPath, sink, sizeCallback); } bool pathExists(const CanonPath & path) override diff --git a/src/libfetchers/input-accessor.cc b/src/libfetchers/input-accessor.cc index 488350849..d1d450cf7 100644 --- a/src/libfetchers/input-accessor.cc +++ b/src/libfetchers/input-accessor.cc @@ -17,7 +17,7 @@ StorePath InputAccessor::fetchToStore( if (method == FileIngestionMethod::Recursive) dumpPath(path, sink, filter ? *filter : defaultPathFilter); else - sink(readFile(path)); // FIXME: stream + readFile(path, sink); }); auto storePath = diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 6508ba807..0cd54e5db 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -44,12 +44,15 @@ void SourceAccessor::dumpPath( { auto dumpContents = [&](const CanonPath & path) { - /* It would be nice if this was streaming, but we need the - size before the contents. */ - auto s = readFile(path); - sink << "contents" << s.size(); - sink(s); - writePadding(s.size(), sink); + sink << "contents"; + std::optional size; + readFile(path, sink, [&](uint64_t _size) + { + size = _size; + sink << _size; + }); + assert(size); + writePadding(*size, sink); }; std::function dump; diff --git a/src/libutil/source-accessor.cc b/src/libutil/source-accessor.cc index d5c8cbcdd..2d03d3d7a 100644 --- a/src/libutil/source-accessor.cc +++ b/src/libutil/source-accessor.cc @@ -10,6 +10,28 @@ SourceAccessor::SourceAccessor() { } +std::string SourceAccessor::readFile(const CanonPath & path) +{ + StringSink sink; + std::optional size; + readFile(path, sink, [&](uint64_t _size) + { + size = _size; + }); + assert(size && *size == sink.s.size()); + return std::move(sink.s); +} + +void SourceAccessor::readFile( + const CanonPath & path, + Sink & sink, + std::function sizeCallback) +{ + auto s = readFile(path); + sizeCallback(s.size()); + sink(s); +} + Hash SourceAccessor::hashPath( const CanonPath & path, PathFilter & filter, @@ -33,9 +55,41 @@ std::string SourceAccessor::showPath(const CanonPath & path) return path.abs(); } -std::string PosixSourceAccessor::readFile(const CanonPath & path) +void PosixSourceAccessor::readFile( + const CanonPath & path, + Sink & sink, + std::function sizeCallback) { - return nix::readFile(path.abs()); + // FIXME: add O_NOFOLLOW since symlinks should be resolved by the + // caller? + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) + throw SysError("opening file '%1%'", path); + + struct stat st; + if (fstat(fd.get(), &st) == -1) + throw SysError("statting file"); + + sizeCallback(st.st_size); + + off_t left = st.st_size; + + std::vector buf(64 * 1024); + while (left) { + checkInterrupt(); + ssize_t rd = read(fd.get(), buf.data(), (size_t) std::min(left, (off_t) buf.size())); + if (rd == -1) { + if (errno != EINTR) + throw SysError("reading from file '%s'", showPath(path)); + } + else if (rd == 0) + throw SysError("unexpected end-of-file reading '%s'", showPath(path)); + else { + assert(rd <= left); + sink({(char *) buf.data(), (size_t) rd}); + left -= rd; + } + } } bool PosixSourceAccessor::pathExists(const CanonPath & path) diff --git a/src/libutil/source-accessor.hh b/src/libutil/source-accessor.hh index 53408eb6c..f3504c9bb 100644 --- a/src/libutil/source-accessor.hh +++ b/src/libutil/source-accessor.hh @@ -5,6 +5,8 @@ namespace nix { +struct Sink; + /** * A read-only filesystem abstraction. This is used by the Nix * evaluator and elsewhere for accessing sources in various @@ -20,7 +22,23 @@ struct SourceAccessor virtual ~SourceAccessor() { } - virtual std::string readFile(const CanonPath & path) = 0; + /** + * Return the contents of a file as a string. + */ + virtual std::string readFile(const CanonPath & path); + + /** + * Write the contents of a file as a sink. `sizeCallback` must be + * called with the size of the file before any data is written to + * the sink. + * + * Note: subclasses of `SourceAccessor` need to implement at least + * one of the `readFile()` variants. + */ + virtual void readFile( + const CanonPath & path, + Sink & sink, + std::function sizeCallback = [](uint64_t size){}); virtual bool pathExists(const CanonPath & path) = 0; @@ -97,7 +115,10 @@ struct PosixSourceAccessor : SourceAccessor */ time_t mtime = 0; - std::string readFile(const CanonPath & path) override; + void readFile( + const CanonPath & path, + Sink & sink, + std::function sizeCallback) override; bool pathExists(const CanonPath & path) override;