Merge pull request #9881 from obsidiansystems/purify-canon-path

Purify `CanonPath`
This commit is contained in:
John Ericson 2024-02-16 10:11:11 -05:00 committed by GitHub
commit 60936f28e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 155 additions and 57 deletions

View file

@ -1,16 +1,25 @@
#include "canon-path.hh"
#include "file-system.hh"
#include "util.hh"
#include "file-path-impl.hh"
namespace nix {
CanonPath CanonPath::root = CanonPath("/");
static std::string absPathPure(std::string_view path)
{
return canonPathInner(path, [](auto &, auto &){});
}
CanonPath::CanonPath(std::string_view raw)
: path(absPath(raw, "/"))
: path(absPathPure(concatStrings("/", raw)))
{ }
CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
: path(absPath(raw, root.abs()))
: path(absPathPure(
raw.size() > 0 && raw[0] == '/'
? raw
: concatStrings(root.abs(), "/", raw)))
{ }
CanonPath::CanonPath(const std::vector<std::string> & elems)

View file

@ -21,9 +21,21 @@ namespace nix {
*
* - There are no components equal to '.' or '..'.
*
* Note that the path does not need to correspond to an actually
* existing path, and there is no guarantee that symlinks are
* resolved.
* `CanonPath` are "virtual" Nix paths for abstract file system objects;
* they are always Unix-style paths, regardless of what OS Nix is
* running on. The `/` root doesn't denote the ambient host file system
* root, but some virtual FS root.
*
* @note It might be useful to compare `openat(some_fd, "foo/bar")` on
* Unix. `"foo/bar"` is a relative path because an absolute path would
* "override" the `some_fd` directory file descriptor and escape to the
* "system root". Conversely, Nix's abstract file operations *never* escape the
* designated virtual file system (i.e. `SourceAccessor` or
* `ParseSink`), so `CanonPath` does not need an absolute/relative
* distinction.
*
* @note The path does not need to correspond to an actually existing
* path, and the path may or may not have unresolved symlinks.
*/
class CanonPath
{

View file

@ -0,0 +1,81 @@
#pragma once
/**
* @file
*
* Pure (no IO) infrastructure just for defining other path types;
* should not be used directly outside of utilities.
*/
#include <string>
#include <string_view>
namespace nix {
/**
* Core pure path canonicalization algorithm.
*
* @param hookComponent
* A callback which is passed two arguments,
* references to
*
* 1. the result so far
*
* 2. the remaining path to resolve
*
* This is a chance to modify those two paths in arbitrary way, e.g. if
* "result" points to a symlink.
*/
typename std::string canonPathInner(
std::string_view remaining,
auto && hookComponent)
{
assert(remaining != "");
std::string result;
result.reserve(256);
while (true) {
/* Skip slashes. */
while (!remaining.empty() && remaining[0] == '/')
remaining.remove_prefix(1);
if (remaining.empty()) break;
auto nextComp = ({
auto nextPathSep = remaining.find('/');
nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep);
});
/* Ignore `.'. */
if (nextComp == ".")
remaining.remove_prefix(1);
/* If `..', delete the last component. */
else if (nextComp == "..")
{
if (!result.empty()) result.erase(result.rfind('/'));
remaining.remove_prefix(2);
}
/* Normal component; copy it. */
else {
result += '/';
if (const auto slash = remaining.find('/'); slash == result.npos) {
result += remaining;
remaining = {};
} else {
result += remaining.substr(0, slash);
remaining = remaining.substr(slash);
}
hookComponent(result, remaining);
}
}
if (result.empty())
result = "/";
return result;
}
}

View file

@ -1,5 +1,6 @@
#include "environment-variables.hh"
#include "file-system.hh"
#include "file-path-impl.hh"
#include "signals.hh"
#include "finally.hh"
#include "serialise.hh"
@ -21,11 +22,18 @@ namespace fs = std::filesystem;
namespace nix {
/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */
static bool isAbsolute(PathView path)
{
return !path.empty() && path[0] == '/';
}
Path absPath(PathView path, std::optional<PathView> dir, bool resolveSymlinks)
{
std::string scratch;
if (path.empty() || path[0] != '/') {
if (!isAbsolute(path)) {
// In this case we need to call `canonPath` on a newly-created
// string. We set `scratch` to that string first, and then set
// `path` to `scratch`. This ensures the newly-created string
@ -58,69 +66,39 @@ Path canonPath(PathView path, bool resolveSymlinks)
{
assert(path != "");
std::string s;
s.reserve(256);
if (path[0] != '/')
if (!isAbsolute(path))
throw Error("not an absolute path: '%1%'", path);
/* This just exists because we cannot set the target of `remaining`
(the callback parameter) directly to a newly-constructed string,
since it is `std::string_view`. */
std::string temp;
/* Count the number of times we follow a symlink and stop at some
arbitrary (but high) limit to prevent infinite loops. */
unsigned int followCount = 0, maxFollow = 1024;
while (1) {
/* Skip slashes. */
while (!path.empty() && path[0] == '/') path.remove_prefix(1);
if (path.empty()) break;
/* Ignore `.'. */
if (path == "." || path.substr(0, 2) == "./")
path.remove_prefix(1);
/* If `..', delete the last component. */
else if (path == ".." || path.substr(0, 3) == "../")
{
if (!s.empty()) s.erase(s.rfind('/'));
path.remove_prefix(2);
}
/* Normal component; copy it. */
else {
s += '/';
if (const auto slash = path.find('/'); slash == path.npos) {
s += path;
path = {};
} else {
s += path.substr(0, slash);
path = path.substr(slash);
}
/* If s points to a symlink, resolve it and continue from there */
if (resolveSymlinks && isLink(s)) {
return canonPathInner(
path,
[&followCount, &temp, maxFollow, resolveSymlinks]
(std::string & result, std::string_view & remaining) {
if (resolveSymlinks && isLink(result)) {
if (++followCount >= maxFollow)
throw Error("infinite symlink recursion in path '%1%'", path);
temp = concatStrings(readLink(s), path);
path = temp;
if (!temp.empty() && temp[0] == '/') {
s.clear(); /* restart for symlinks pointing to absolute path */
throw Error("infinite symlink recursion in path '%0%'", remaining);
remaining = (temp = concatStrings(readLink(result), remaining));
if (isAbsolute(remaining)) {
/* restart for symlinks pointing to absolute path */
result.clear();
} else {
s = dirOf(s);
if (s == "/") { // we dont want trailing slashes here, which dirOf only produces if s = /
s.clear();
result = dirOf(result);
if (result == "/") {
/* we dont want trailing slashes here, which `dirOf`
only produces if `result = /` */
result.clear();
}
}
}
}
}
if (s.empty()) {
s = "/";
}
return s;
});
}

View file

@ -41,6 +41,24 @@ namespace nix {
}
}
TEST(CanonPath, from_existing) {
CanonPath p0("foo//bar/");
{
CanonPath p("/baz//quux/", p0);
ASSERT_EQ(p.abs(), "/baz/quux");
ASSERT_EQ(p.rel(), "baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/baz");
}
{
CanonPath p("baz//quux/", p0);
ASSERT_EQ(p.abs(), "/foo/bar/baz/quux");
ASSERT_EQ(p.rel(), "foo/bar/baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/foo/bar/baz");
}
}
TEST(CanonPath, pop) {
CanonPath p("foo/bar/x");
ASSERT_EQ(p.abs(), "/foo/bar/x");