Merge pull request #9767 from obsidiansystems/canon-path-split

Support Windows paths in `canonPath` and `absPath`
This commit is contained in:
Eelco Dolstra 2024-02-27 17:12:29 +01:00 committed by GitHub
commit c3e9e3d0c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 138 additions and 24 deletions

View file

@ -8,7 +8,7 @@ CanonPath CanonPath::root = CanonPath("/");
static std::string absPathPure(std::string_view path) static std::string absPathPure(std::string_view path)
{ {
return canonPathInner(path, [](auto &, auto &){}); return canonPathInner<UnixPathTrait>(path, [](auto &, auto &){});
} }
CanonPath::CanonPath(std::string_view raw) CanonPath::CanonPath(std::string_view raw)

View file

@ -10,6 +10,100 @@
namespace nix { namespace nix {
/**
* Unix-style path primives.
*
* Nix'result own "logical" paths are always Unix-style. So this is always
* used for that, and additionally used for native paths on Unix.
*/
struct UnixPathTrait
{
using CharT = char;
using String = std::string;
using StringView = std::string_view;
constexpr static char preferredSep = '/';
static inline bool isPathSep(char c)
{
return c == '/';
}
static inline size_t findPathSep(StringView path, size_t from = 0)
{
return path.find('/', from);
}
static inline size_t rfindPathSep(StringView path, size_t from = StringView::npos)
{
return path.rfind('/', from);
}
};
/**
* Windows-style path primitives.
*
* The character type is a parameter because while windows paths rightly
* work over UTF-16 (*) using `wchar_t`, at the current time we are
* often manipulating them converted to UTF-8 (*) using `char`.
*
* (Actually neither are guaranteed to be valid unicode; both are
* arbitrary non-0 8- or 16-bit bytes. But for charcters with specifical
* meaning like '/', '\\', ':', etc., we refer to an encoding scheme,
* and also for sake of UIs that display paths a text.)
*/
template<class CharT0>
struct WindowsPathTrait
{
using CharT = CharT0;
using String = std::basic_string<CharT>;
using StringView = std::basic_string_view<CharT>;
constexpr static CharT preferredSep = '\\';
static inline bool isPathSep(CharT c)
{
return c == '/' || c == preferredSep;
}
static size_t findPathSep(StringView path, size_t from = 0)
{
size_t p1 = path.find('/', from);
size_t p2 = path.find(preferredSep, from);
return p1 == String::npos ? p2 :
p2 == String::npos ? p1 :
std::min(p1, p2);
}
static size_t rfindPathSep(StringView path, size_t from = String::npos)
{
size_t p1 = path.rfind('/', from);
size_t p2 = path.rfind(preferredSep, from);
return p1 == String::npos ? p2 :
p2 == String::npos ? p1 :
std::max(p1, p2);
}
};
/**
* @todo Revisit choice of `char` or `wchar_t` for `WindowsPathTrait`
* argument.
*/
using NativePathTrait =
#ifdef _WIN32
WindowsPathTrait<char>
#else
UnixPathTrait
#endif
;
/** /**
* Core pure path canonicalization algorithm. * Core pure path canonicalization algorithm.
* *
@ -24,25 +118,26 @@ namespace nix {
* This is a chance to modify those two paths in arbitrary way, e.g. if * This is a chance to modify those two paths in arbitrary way, e.g. if
* "result" points to a symlink. * "result" points to a symlink.
*/ */
typename std::string canonPathInner( template<class PathDict>
std::string_view remaining, typename PathDict::String canonPathInner(
typename PathDict::StringView remaining,
auto && hookComponent) auto && hookComponent)
{ {
assert(remaining != ""); assert(remaining != "");
std::string result; typename PathDict::String result;
result.reserve(256); result.reserve(256);
while (true) { while (true) {
/* Skip slashes. */ /* Skip slashes. */
while (!remaining.empty() && remaining[0] == '/') while (!remaining.empty() && PathDict::isPathSep(remaining[0]))
remaining.remove_prefix(1); remaining.remove_prefix(1);
if (remaining.empty()) break; if (remaining.empty()) break;
auto nextComp = ({ auto nextComp = ({
auto nextPathSep = remaining.find('/'); auto nextPathSep = PathDict::findPathSep(remaining);
nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep); nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep);
}); });
@ -53,14 +148,14 @@ typename std::string canonPathInner(
/* If `..', delete the last component. */ /* If `..', delete the last component. */
else if (nextComp == "..") else if (nextComp == "..")
{ {
if (!result.empty()) result.erase(result.rfind('/')); if (!result.empty()) result.erase(PathDict::rfindPathSep(result));
remaining.remove_prefix(2); remaining.remove_prefix(2);
} }
/* Normal component; copy it. */ /* Normal component; copy it. */
else { else {
result += '/'; result += PathDict::preferredSep;
if (const auto slash = remaining.find('/'); slash == result.npos) { if (const auto slash = PathDict::findPathSep(remaining); slash == result.npos) {
result += remaining; result += remaining;
remaining = {}; remaining = {};
} else { } else {
@ -73,7 +168,7 @@ typename std::string canonPathInner(
} }
if (result.empty()) if (result.empty())
result = "/"; result = typename PathDict::String { PathDict::preferredSep };
return result; return result;
} }

View file

@ -22,10 +22,14 @@ namespace fs = std::filesystem;
namespace nix { namespace nix {
/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */ /**
* Treat the string as possibly an absolute path, by inspecting the
* start of it. Return whether it was probably intended to be
* absolute.
*/
static bool isAbsolute(PathView path) static bool isAbsolute(PathView path)
{ {
return !path.empty() && path[0] == '/'; return fs::path { path }.is_absolute();
} }
@ -69,6 +73,9 @@ Path canonPath(PathView path, bool resolveSymlinks)
if (!isAbsolute(path)) if (!isAbsolute(path))
throw Error("not an absolute path: '%1%'", path); throw Error("not an absolute path: '%1%'", path);
// For Windows
auto rootName = fs::path { path }.root_name();
/* This just exists because we cannot set the target of `remaining` /* This just exists because we cannot set the target of `remaining`
(the callback parameter) directly to a newly-constructed string, (the callback parameter) directly to a newly-constructed string,
since it is `std::string_view`. */ since it is `std::string_view`. */
@ -78,7 +85,7 @@ Path canonPath(PathView path, bool resolveSymlinks)
arbitrary (but high) limit to prevent infinite loops. */ arbitrary (but high) limit to prevent infinite loops. */
unsigned int followCount = 0, maxFollow = 1024; unsigned int followCount = 0, maxFollow = 1024;
return canonPathInner( auto ret = canonPathInner<NativePathTrait>(
path, path,
[&followCount, &temp, maxFollow, resolveSymlinks] [&followCount, &temp, maxFollow, resolveSymlinks]
(std::string & result, std::string_view & remaining) { (std::string & result, std::string_view & remaining) {
@ -99,6 +106,10 @@ Path canonPath(PathView path, bool resolveSymlinks)
} }
} }
}); });
if (!rootName.empty())
ret = rootName.string() + std::move(ret);
return ret;
} }

View file

@ -9,6 +9,14 @@
#include <numeric> #include <numeric>
#ifdef _WIN32
# define FS_SEP "\\"
# define FS_ROOT "C:" FS_SEP // Need a mounted one, C drive is likely
#else
# define FS_SEP "/"
# define FS_ROOT FS_SEP
#endif
namespace nix { namespace nix {
/* ----------- tests for util.hh ------------------------------------------------*/ /* ----------- tests for util.hh ------------------------------------------------*/
@ -18,9 +26,9 @@ namespace nix {
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
TEST(absPath, doesntChangeRoot) { TEST(absPath, doesntChangeRoot) {
auto p = absPath("/"); auto p = absPath(FS_ROOT);
ASSERT_EQ(p, "/"); ASSERT_EQ(p, FS_ROOT);
} }
@ -53,11 +61,11 @@ namespace nix {
TEST(absPath, pathIsCanonicalised) { TEST(absPath, pathIsCanonicalised) {
auto path = "/some/path/with/trailing/dot/."; auto path = FS_ROOT "some/path/with/trailing/dot/.";
auto p1 = absPath(path); auto p1 = absPath(path);
auto p2 = absPath(p1); auto p2 = absPath(p1);
ASSERT_EQ(p1, "/some/path/with/trailing/dot"); ASSERT_EQ(p1, FS_ROOT "some" FS_SEP "path" FS_SEP "with" FS_SEP "trailing" FS_SEP "dot");
ASSERT_EQ(p1, p2); ASSERT_EQ(p1, p2);
} }
@ -66,24 +74,24 @@ namespace nix {
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
TEST(canonPath, removesTrailingSlashes) { TEST(canonPath, removesTrailingSlashes) {
auto path = "/this/is/a/path//"; auto path = FS_ROOT "this/is/a/path//";
auto p = canonPath(path); auto p = canonPath(path);
ASSERT_EQ(p, "/this/is/a/path"); ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
} }
TEST(canonPath, removesDots) { TEST(canonPath, removesDots) {
auto path = "/this/./is/a/path/./"; auto path = FS_ROOT "this/./is/a/path/./";
auto p = canonPath(path); auto p = canonPath(path);
ASSERT_EQ(p, "/this/is/a/path"); ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
} }
TEST(canonPath, removesDots2) { TEST(canonPath, removesDots2) {
auto path = "/this/a/../is/a////path/foo/.."; auto path = FS_ROOT "this/a/../is/a////path/foo/..";
auto p = canonPath(path); auto p = canonPath(path);
ASSERT_EQ(p, "/this/is/a/path"); ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
} }
TEST(canonPath, requiresAbsolutePath) { TEST(canonPath, requiresAbsolutePath) {
@ -197,7 +205,7 @@ namespace nix {
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
TEST(pathExists, rootExists) { TEST(pathExists, rootExists) {
ASSERT_TRUE(pathExists("/")); ASSERT_TRUE(pathExists(FS_ROOT));
} }
TEST(pathExists, cwdExists) { TEST(pathExists, cwdExists) {