From 5987fb7459e42ee970b22a9e7d896fc415321743 Mon Sep 17 00:00:00 2001 From: squalus Date: Tue, 4 Oct 2022 00:47:43 -0700 Subject: [PATCH] Add fsync-store-paths option - Add recursiveSync function to flush a directory tree to disk - Add AutoCloseFD::startFsync to initiate an asynchronous fsync without waiting for the result - Initiate an asynchronous fsync while extracting NAR files - Implement the fsync-store-paths option in LocalStore --- src/libstore/globals.hh | 7 ++++++ src/libstore/local-store.cc | 23 +++++++++++++++---- src/libutil/archive.cc | 11 ++++++++-- src/libutil/archive.hh | 2 +- src/libutil/filesystem.cc | 44 +++++++++++++++++++++++++++++++++++++ src/libutil/util.cc | 14 +++++++++++- src/libutil/util.hh | 11 ++++++++-- 7 files changed, 102 insertions(+), 10 deletions(-) diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 274a15dd7..fd5cce7ad 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -234,6 +234,13 @@ public: default is `true`. )"}; + Setting fsyncStorePaths{this, false, "fsync-store-paths", + R"( + "Whether to call `fsync()` on store paths before registering them, to + flush them to disk. This improves robustness in case of system crashes, + but reduces performance. The default is `false`. + )"}; + Setting useSQLiteWAL{this, !isWSL1(), "use-sqlite-wal", "Whether SQLite should use WAL mode."}; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b67668e52..4bbeebc3a 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1299,7 +1299,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, TeeSource wrapperSource { source, hashSink }; - restorePath(realPath, wrapperSource); + restorePath(realPath, wrapperSource, settings.fsyncStorePaths); auto hashResult = hashSink.finish(); @@ -1342,6 +1342,11 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, optimisePath(realPath, repair); // FIXME: combine with hashPath() + if (settings.fsyncStorePaths) { + recursiveSync(realPath); + syncParent(realPath); + } + registerValidPath(info); } @@ -1402,7 +1407,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name tempPath = tempDir + "/x"; if (method == FileIngestionMethod::Recursive) - restorePath(tempPath, bothSource); + restorePath(tempPath, bothSource, settings.fsyncStorePaths); else writeFile(tempPath, bothSource); @@ -1434,7 +1439,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name StringSource dumpSource { dump }; /* Restore from the NAR in memory. */ if (method == FileIngestionMethod::Recursive) - restorePath(realPath, dumpSource); + restorePath(realPath, dumpSource, settings.fsyncStorePaths); else writeFile(realPath, dumpSource); } else { @@ -1459,6 +1464,12 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name info.narSize = narHash.second; info.references = references; info.ca = FixedOutputHash { .method = method, .hash = hash }; + + if (settings.fsyncStorePaths) { + recursiveSync(realPath); + syncParent(realPath); + } + registerValidPath(info); } @@ -1491,7 +1502,7 @@ StorePath LocalStore::addTextToStore( autoGC(); - writeFile(realPath, s); + writeFile(realPath, s, 0666, settings.fsyncStorePaths); canonicalisePathMetaData(realPath, {}); @@ -1505,6 +1516,10 @@ StorePath LocalStore::addTextToStore( info.narSize = sink.s.size(); info.references = references; info.ca = TextHash { .hash = hash }; + + if (settings.fsyncStorePaths) + syncParent(realPath); + registerValidPath(info); } diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 0e2b9d12c..e85fe3d3f 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -306,6 +306,9 @@ struct RestoreSink : ParseSink { Path dstPath; AutoCloseFD fd; + bool startFsync; + + explicit RestoreSink(bool startFsync) : startFsync{startFsync} {} void createDirectory(const Path & path) override { @@ -323,6 +326,10 @@ struct RestoreSink : ParseSink void closeRegularFile() override { + /* Initiate an fsync operation without waiting for the result. The real fsync should be run before registering + a store path, but this is a performance optimization to allow the disk write to start early. */ + if (startFsync) + fd.startFsync(); /* Call close explicitly to make sure the error is checked */ fd.close(); } @@ -367,9 +374,9 @@ struct RestoreSink : ParseSink }; -void restorePath(const Path & path, Source & source) +void restorePath(const Path & path, Source & source, bool startFsync) { - RestoreSink sink; + RestoreSink sink { startFsync }; sink.dstPath = path; parseDump(sink, source); } diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index e42dea540..64b3501b6 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -95,7 +95,7 @@ struct RetrieveRegularNARSink : ParseSink void parseDump(ParseSink & sink, Source & source); -void restorePath(const Path & path, Source & source); +void restorePath(const Path & path, Source & source, bool startFsync = false); /* Read a NAR from 'source' and write it to 'sink'. */ void copyNAR(Source & source, Sink & sink); diff --git a/src/libutil/filesystem.cc b/src/libutil/filesystem.cc index 3a732cff8..5666fc809 100644 --- a/src/libutil/filesystem.cc +++ b/src/libutil/filesystem.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include "finally.hh" #include "util.hh" @@ -170,4 +171,47 @@ void moveFile(const Path & oldName, const Path & newName) } } +void recursiveSync(const Path & path) +{ + /* If it's a file, just fsync and return */ + auto st = lstat(path); + if (S_ISREG(st.st_mode)) { + AutoCloseFD fd = open(path.c_str(), O_RDONLY, 0); + if (!fd) + throw SysError("opening file '%1%'", path); + fd.fsync(); + return; + } + + /* Otherwise, perform a depth-first traversal of the directory and fsync all the files */ + std::deque dirsToEnumerate; + dirsToEnumerate.push_back(path); + std::vector dirsToFsync; + while (!dirsToEnumerate.empty()) { + auto currentDir = dirsToEnumerate.back(); + dirsToEnumerate.pop_back(); + const auto dirEntries = readDirectory(currentDir); + for (const auto& dirEntry : dirEntries) { + auto entryPath = currentDir + "/" + dirEntry.name; + if (dirEntry.type == DT_DIR) { + dirsToEnumerate.emplace_back(std::move(entryPath)); + } else if (dirEntry.type == DT_REG) { + AutoCloseFD fd = open(entryPath.c_str(), O_RDONLY, 0); + if (!fd) + throw SysError("opening file '%1%'", entryPath); + fd.fsync(); + } + } + dirsToFsync.emplace_back(std::move(currentDir)); + } + + /* fsync all the directories */ + for (auto dir = dirsToFsync.rbegin(); dir != dirsToFsync.rend(); ++dir) { + AutoCloseFD fd = open(dir->c_str(), O_RDONLY, 0); + if (!fd) + throw SysError("opening directory '%1%'", *dir); + fd.fsync(); + } +} + } diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 993dc1cb6..383288667 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -838,7 +839,7 @@ void AutoCloseFD::close() } } -void AutoCloseFD::fsync() +void AutoCloseFD::fsync() const { if (fd != -1) { int result; @@ -853,6 +854,17 @@ void AutoCloseFD::fsync() } +void AutoCloseFD::startFsync() const +{ +#if __linux__ + if (fd != -1) { + /* Ignore failure, since fsync must be run later anyway. This is just a performance optimization. */ + ::sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); + } +#endif +} + + AutoCloseFD::operator bool() const { return fd != -1; diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 9b149de80..ea83351a7 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -119,9 +119,12 @@ void writeFile(const Path & path, std::string_view s, mode_t mode = 0666, bool s void writeFile(const Path & path, Source & source, mode_t mode = 0666, bool sync = false); -/* Flush a file's parent directory to disk */ +/* Flush a path's parent directory to disk */ void syncParent(const Path & path); +/* Flush a file or entire directory tree to disk */ +void recursiveSync(const Path & path); + /* Read a line from a file descriptor. */ std::string readLine(int fd); @@ -234,7 +237,11 @@ public: explicit operator bool() const; int release(); void close(); - void fsync(); + /* Perform a blocking fsync operation */ + void fsync() const; + /* Asynchronously flush to disk without blocking, if available on the platform. This is just a performance + * optimization, and fsync must be run later even if this is called. */ + void startFsync() const; };