nix-super/src/libstore/optimise-store.cc

312 lines
9.5 KiB
C++
Raw Normal View History

#include "local-store.hh"
#include "globals.hh"
#include "signals.hh"
#include "posix-fs-canonicalise.hh"
#include "posix-source-accessor.hh"
2014-12-14 02:51:14 +02:00
#include <cstdlib>
2016-11-14 14:37:16 +02:00
#include <cstring>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
2009-09-24 10:39:55 +03:00
#include <stdio.h>
#include <regex>
namespace nix {
static void makeWritable(const Path & path)
{
2020-09-23 20:17:28 +03:00
auto st = lstat(path);
if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1)
throw SysError("changing writability of '%1%'", path);
}
struct MakeReadOnly
{
Path path;
MakeReadOnly(const PathView path) : path(path) { }
~MakeReadOnly()
{
try {
/* This will make the path read-only. */
if (path != "") canonicaliseTimestampAndPermissions(path);
} catch (...) {
ignoreException();
}
}
};
LocalStore::InodeHash LocalStore::loadInodeHash()
{
debug("loading hash inodes in memory");
InodeHash inodeHash;
2017-01-16 23:39:27 +02:00
AutoCloseDir dir(opendir(linksDir.c_str()));
if (!dir) throw SysError("opening directory '%1%'", linksDir);
struct dirent * dirent;
2017-01-16 23:39:27 +02:00
while (errno = 0, dirent = readdir(dir.get())) { /* sic */
checkInterrupt();
// We don't care if we hit non-hash files, anything goes
inodeHash.insert(dirent->d_ino);
}
if (errno) throw SysError("reading directory '%1%'", linksDir);
printMsg(lvlTalkative, "loaded %1% hash inodes", inodeHash.size());
return inodeHash;
}
Strings LocalStore::readDirectoryIgnoringInodes(const Path & path, const InodeHash & inodeHash)
{
Strings names;
2017-01-16 23:39:27 +02:00
AutoCloseDir dir(opendir(path.c_str()));
if (!dir) throw SysError("opening directory '%1%'", path);
struct dirent * dirent;
2017-01-16 23:39:27 +02:00
while (errno = 0, dirent = readdir(dir.get())) { /* sic */
checkInterrupt();
if (inodeHash.count(dirent->d_ino)) {
debug("'%1%' is already linked", dirent->d_name);
continue;
}
std::string name = dirent->d_name;
if (name == "." || name == "..") continue;
names.push_back(name);
}
if (errno) throw SysError("reading directory '%1%'", path);
return names;
}
void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
const Path & path, InodeHash & inodeHash, RepairFlag repair)
{
checkInterrupt();
2014-05-15 12:19:16 +03:00
2020-09-23 20:17:28 +03:00
auto st = lstat(path);
#if __APPLE__
2017-07-30 13:28:50 +03:00
/* HFS/macOS has some undocumented security feature disabling hardlinking for
special files within .app dirs. *.app/Contents/PkgInfo and
*.app/Contents/Resources/\*.lproj seem to be the only paths affected. See
https://github.com/NixOS/nix/issues/1443 for more discussion. */
if (std::regex_search(path, std::regex("\\.app/Contents/.+$")))
2017-08-10 14:51:07 +03:00
{
debug("'%1%' is not allowed to be linked in macOS", path);
return;
}
#endif
if (S_ISDIR(st.st_mode)) {
Strings names = readDirectoryIgnoringInodes(path, inodeHash);
2015-07-17 20:24:28 +03:00
for (auto & i : names)
optimisePath_(act, stats, path + "/" + i, inodeHash, repair);
return;
}
/* We can hard link regular files and maybe symlinks. */
if (!S_ISREG(st.st_mode)
#if CAN_LINK_SYMLINK
&& !S_ISLNK(st.st_mode)
#endif
) return;
/* Sometimes SNAFUs can cause files in the Nix store to be
modified, in particular when running programs as root under
NixOS (example: $fontconfig/var/cache being modified). Skip
those files. FIXME: check the modification time. */
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-21 01:27:36 +02:00
warn("skipping suspicious writable file '%1%'", path);
return;
}
/* This can still happen on top-level files. */
if (st.st_nlink > 1 && inodeHash.count(st.st_ino)) {
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-21 01:27:36 +02:00
debug("'%s' is already linked, with %d other file(s)", path, st.st_nlink - 2);
return;
}
/* Hash the file. Note that hashPath() returns the hash over the
NAR serialisation, which includes the execute bit on the file.
Thus, executable and non-executable files with the same
contents *won't* be linked (which is good because otherwise the
permissions would be screwed up).
Also note that if `path' is a symlink, then we're hashing the
contents of the symlink (i.e. the result of readlink()), not
the contents of the target (which may not even exist). */
Hash hash = ({
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
});
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));
/* Check if this is a known hash. */
Path linkPath = linksDir + "/" + hash.to_string(HashFormat::Nix32, false);
/* Maybe delete the link, if it has been corrupted. */
if (pathExists(linkPath)) {
auto stLink = lstat(linkPath);
if (st.st_size != stLink.st_size
|| (repair && hash != ({
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
})))
{
// XXX: Consider overwriting linkPath with our valid version.
warn("removing corrupted link '%s'", linkPath);
warn("There may be more corrupted paths."
"\nYou should run `nix-store --verify --check-contents --repair` to fix them all");
unlink(linkPath.c_str());
}
}
if (!pathExists(linkPath)) {
/* Nope, create a hard link in the links directory. */
if (link(path.c_str(), linkPath.c_str()) == 0) {
inodeHash.insert(st.st_ino);
return;
2014-05-15 12:19:16 +03:00
}
switch (errno) {
case EEXIST:
/* Fall through if another process created linkPath before
we did. */
break;
case ENOSPC:
/* On ext4, that probably means the directory index is
full. When that happens, it's fine to ignore it: we
just effectively disable deduplication of this
file. */
printInfo("cannot link '%s' to '%s': %s", linkPath, path, strerror(errno));
return;
default:
throw SysError("cannot link '%1%' to '%2%'", linkPath, path);
}
}
/* Yes! We've seen a file with the same contents. Replace the
current file with a hard link to that file. */
2020-09-23 20:17:28 +03:00
auto stLink = lstat(linkPath);
if (st.st_ino == stLink.st_ino) {
debug("'%1%' is already linked to '%2%'", path, linkPath);
return;
}
printMsg(lvlTalkative, "linking '%1%' to '%2%'", path, linkPath);
/* Make the containing directory writable, but only if it's not
the store itself (we don't want or need to mess with its
permissions). */
const Path dirOfPath(dirOf(path));
bool mustToggle = dirOfPath != realStoreDir.get();
if (mustToggle) makeWritable(dirOfPath);
/* When we're done, make the directory read-only again and reset
its timestamp back to 0. */
MakeReadOnly makeReadOnly(mustToggle ? dirOfPath : "");
Path tempLink = fmt("%1%/.tmp-link-%2%-%3%", realStoreDir, getpid(), random());
if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
if (errno == EMLINK) {
/* Too many links to the same file (>= 32000 on most file
systems). This is likely to happen with empty files.
Just shrug and ignore. */
if (st.st_size)
printInfo("'%1%' has maximum number of links", linkPath);
return;
}
throw SysError("cannot link '%1%' to '%2%'", tempLink, linkPath);
}
/* Atomically replace the old file with the new hard link. */
try {
renameFile(tempLink, path);
} catch (SystemError & e) {
if (unlink(tempLink.c_str()) == -1)
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-21 01:27:36 +02:00
printError("unable to unlink '%1%'", tempLink);
if (errno == EMLINK) {
/* Some filesystems generate too many links on the rename,
rather than on the original link. (Probably it
temporarily increases the st_nlink field before
decreasing it again.) */
debug("'%s' has reached maximum number of links", linkPath);
return;
}
throw;
}
stats.filesLinked++;
stats.bytesFreed += st.st_size;
stats.blocksFreed += st.st_blocks;
if (act)
act->result(resFileLinked, st.st_size, st.st_blocks);
}
void LocalStore::optimiseStore(OptimiseStats & stats)
{
Activity act(*logger, actOptimiseStore);
auto paths = queryAllValidPaths();
InodeHash inodeHash = loadInodeHash();
act.progress(0, paths.size());
uint64_t done = 0;
2015-07-17 20:24:28 +03:00
for (auto & i : paths) {
addTempRoot(i);
if (!isValidPath(i)) continue; /* path was GC'ed, probably */
{
Activity act(*logger, lvlTalkative, actUnknown, fmt("optimising path '%s'", printStorePath(i)));
optimisePath_(&act, stats, realStoreDir + "/" + std::string(i.to_string()), inodeHash, NoRepair);
}
done++;
act.progress(done, paths.size());
}
}
void LocalStore::optimiseStore()
{
OptimiseStats stats;
optimiseStore(stats);
2020-10-06 11:40:49 +03:00
printInfo("%s freed by hard-linking %d files",
showBytes(stats.bytesFreed),
stats.filesLinked);
}
void LocalStore::optimisePath(const Path & path, RepairFlag repair)
{
OptimiseStats stats;
InodeHash inodeHash;
if (settings.autoOptimiseStore) optimisePath_(nullptr, stats, path, inodeHash, repair);
}
}