Add fsync-store-paths option

- Add recursiveSync function to flush a directory tree to disk

- Add AutoCloseFD::startFsync to initiate an asynchronous fsync
  without waiting for the result

- Initiate an asynchronous fsync while extracting NAR files

- Implement the fsync-store-paths option in LocalStore
This commit is contained in:
squalus 2022-10-04 00:47:43 -07:00
parent 1437582ccd
commit 5987fb7459
7 changed files with 102 additions and 10 deletions

View file

@ -234,6 +234,13 @@ public:
default is `true`.
)"};
Setting<bool> fsyncStorePaths{this, false, "fsync-store-paths",
R"(
"Whether to call `fsync()` on store paths before registering them, to
flush them to disk. This improves robustness in case of system crashes,
but reduces performance. The default is `false`.
)"};
Setting<bool> useSQLiteWAL{this, !isWSL1(), "use-sqlite-wal",
"Whether SQLite should use WAL mode."};

View file

@ -1299,7 +1299,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
TeeSource wrapperSource { source, hashSink };
restorePath(realPath, wrapperSource);
restorePath(realPath, wrapperSource, settings.fsyncStorePaths);
auto hashResult = hashSink.finish();
@ -1342,6 +1342,11 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
optimisePath(realPath, repair); // FIXME: combine with hashPath()
if (settings.fsyncStorePaths) {
recursiveSync(realPath);
syncParent(realPath);
}
registerValidPath(info);
}
@ -1402,7 +1407,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
tempPath = tempDir + "/x";
if (method == FileIngestionMethod::Recursive)
restorePath(tempPath, bothSource);
restorePath(tempPath, bothSource, settings.fsyncStorePaths);
else
writeFile(tempPath, bothSource);
@ -1434,7 +1439,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
StringSource dumpSource { dump };
/* Restore from the NAR in memory. */
if (method == FileIngestionMethod::Recursive)
restorePath(realPath, dumpSource);
restorePath(realPath, dumpSource, settings.fsyncStorePaths);
else
writeFile(realPath, dumpSource);
} else {
@ -1459,6 +1464,12 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
info.narSize = narHash.second;
info.references = references;
info.ca = FixedOutputHash { .method = method, .hash = hash };
if (settings.fsyncStorePaths) {
recursiveSync(realPath);
syncParent(realPath);
}
registerValidPath(info);
}
@ -1491,7 +1502,7 @@ StorePath LocalStore::addTextToStore(
autoGC();
writeFile(realPath, s);
writeFile(realPath, s, 0666, settings.fsyncStorePaths);
canonicalisePathMetaData(realPath, {});
@ -1505,6 +1516,10 @@ StorePath LocalStore::addTextToStore(
info.narSize = sink.s.size();
info.references = references;
info.ca = TextHash { .hash = hash };
if (settings.fsyncStorePaths)
syncParent(realPath);
registerValidPath(info);
}

View file

@ -306,6 +306,9 @@ struct RestoreSink : ParseSink
{
Path dstPath;
AutoCloseFD fd;
bool startFsync;
explicit RestoreSink(bool startFsync) : startFsync{startFsync} {}
void createDirectory(const Path & path) override
{
@ -323,6 +326,10 @@ struct RestoreSink : ParseSink
void closeRegularFile() override
{
/* Initiate an fsync operation without waiting for the result. The real fsync should be run before registering
a store path, but this is a performance optimization to allow the disk write to start early. */
if (startFsync)
fd.startFsync();
/* Call close explicitly to make sure the error is checked */
fd.close();
}
@ -367,9 +374,9 @@ struct RestoreSink : ParseSink
};
void restorePath(const Path & path, Source & source)
void restorePath(const Path & path, Source & source, bool startFsync)
{
RestoreSink sink;
RestoreSink sink { startFsync };
sink.dstPath = path;
parseDump(sink, source);
}

View file

@ -95,7 +95,7 @@ struct RetrieveRegularNARSink : ParseSink
void parseDump(ParseSink & sink, Source & source);
void restorePath(const Path & path, Source & source);
void restorePath(const Path & path, Source & source, bool startFsync = false);
/* Read a NAR from 'source' and write it to 'sink'. */
void copyNAR(Source & source, Sink & sink);

View file

@ -1,6 +1,7 @@
#include <sys/time.h>
#include <filesystem>
#include <atomic>
#include <deque>
#include "finally.hh"
#include "util.hh"
@ -170,4 +171,47 @@ void moveFile(const Path & oldName, const Path & newName)
}
}
void recursiveSync(const Path & path)
{
/* If it's a file, just fsync and return */
auto st = lstat(path);
if (S_ISREG(st.st_mode)) {
AutoCloseFD fd = open(path.c_str(), O_RDONLY, 0);
if (!fd)
throw SysError("opening file '%1%'", path);
fd.fsync();
return;
}
/* Otherwise, perform a depth-first traversal of the directory and fsync all the files */
std::deque<Path> dirsToEnumerate;
dirsToEnumerate.push_back(path);
std::vector<Path> dirsToFsync;
while (!dirsToEnumerate.empty()) {
auto currentDir = dirsToEnumerate.back();
dirsToEnumerate.pop_back();
const auto dirEntries = readDirectory(currentDir);
for (const auto& dirEntry : dirEntries) {
auto entryPath = currentDir + "/" + dirEntry.name;
if (dirEntry.type == DT_DIR) {
dirsToEnumerate.emplace_back(std::move(entryPath));
} else if (dirEntry.type == DT_REG) {
AutoCloseFD fd = open(entryPath.c_str(), O_RDONLY, 0);
if (!fd)
throw SysError("opening file '%1%'", entryPath);
fd.fsync();
}
}
dirsToFsync.emplace_back(std::move(currentDir));
}
/* fsync all the directories */
for (auto dir = dirsToFsync.rbegin(); dir != dirsToFsync.rend(); ++dir) {
AutoCloseFD fd = open(dir->c_str(), O_RDONLY, 0);
if (!fd)
throw SysError("opening directory '%1%'", *dir);
fd.fsync();
}
}
}

View file

@ -11,6 +11,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <future>
#include <iostream>
#include <mutex>
@ -838,7 +839,7 @@ void AutoCloseFD::close()
}
}
void AutoCloseFD::fsync()
void AutoCloseFD::fsync() const
{
if (fd != -1) {
int result;
@ -853,6 +854,17 @@ void AutoCloseFD::fsync()
}
void AutoCloseFD::startFsync() const
{
#if __linux__
if (fd != -1) {
/* Ignore failure, since fsync must be run later anyway. This is just a performance optimization. */
::sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
}
#endif
}
AutoCloseFD::operator bool() const
{
return fd != -1;

View file

@ -119,9 +119,12 @@ void writeFile(const Path & path, std::string_view s, mode_t mode = 0666, bool s
void writeFile(const Path & path, Source & source, mode_t mode = 0666, bool sync = false);
/* Flush a file's parent directory to disk */
/* Flush a path's parent directory to disk */
void syncParent(const Path & path);
/* Flush a file or entire directory tree to disk */
void recursiveSync(const Path & path);
/* Read a line from a file descriptor. */
std::string readLine(int fd);
@ -234,7 +237,11 @@ public:
explicit operator bool() const;
int release();
void close();
void fsync();
/* Perform a blocking fsync operation */
void fsync() const;
/* Asynchronously flush to disk without blocking, if available on the platform. This is just a performance
* optimization, and fsync must be run later even if this is called. */
void startFsync() const;
};