Allow special characters in flake paths

Support using nix flakes in paths with spaces or abitrary unicode characters.
This introduces the convention that the path part of the URL should be
percent-encoded when dealing with `path:` urls and not when using
filepaths (following the convention of firefox).

Co-authored-by: Rendal <rasmus@rend.al>
This commit is contained in:
Théophane Hufschmitt 2023-05-31 10:36:43 +02:00
parent d8cebae939
commit 50e61f579c
3 changed files with 57 additions and 39 deletions

View file

@ -77,14 +77,6 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
{
using namespace fetchers;
static std::string fnRegex = "[0-9a-zA-Z-._~!$&'\"()*+,;=]+";
static std::regex pathUrlRegex(
"(/?" + fnRegex + "(?:/" + fnRegex + ")*/?)"
+ "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);
static std::regex flakeRegex(
"((" + flakeIdRegexS + ")(?:/(?:" + refAndOrRevRegex + "))?)"
+ "(?:#(" + queryRegex + "))?",
@ -92,26 +84,23 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
std::smatch match;
/* Check if 'url' is a flake ID. This is an abbreviated syntax for
'flake:<flake-id>?ref=<ref>&rev=<rev>'. */
if (std::regex_match(url, match, flakeRegex)) {
auto parsedURL = ParsedURL{
.url = url,
.base = "flake:" + match.str(1),
.scheme = "flake",
.authority = "",
.path = match[1],
};
return std::make_pair(
FlakeRef(Input::fromURL(parsedURL, isFlake), ""),
percentDecode(match.str(6)));
auto parsePathFlakeRef = [&]() {
std::string path = url;
std::string fragment = "";
std::map<std::string, std::string> query = {};
auto pathEnd = url.find_first_of("#?");
auto fragmentStart = pathEnd;
if (pathEnd != std::string::npos && url[pathEnd] == '?')
fragmentStart = url.find("#");
if (pathEnd != std::string::npos) {
path = url.substr(0, pathEnd);
}
if (fragmentStart != std::string::npos) {
fragment = percentDecode(url.substr(fragmentStart+1));
}
if (fragmentStart != std::string::npos && pathEnd != std::string::npos) {
query = decodeQuery(url.substr(pathEnd+1, fragmentStart));
}
else if (std::regex_match(url, match, pathUrlRegex)) {
std::string path = match[1];
std::string fragment = percentDecode(match.str(3));
if (baseDir) {
/* Check if 'url' is a path (either absolute or relative
@ -163,7 +152,7 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
.scheme = "git+file",
.authority = "",
.path = flakeRoot,
.query = decodeQuery(match[2]),
.query = query,
};
if (subdir != "") {
@ -188,7 +177,6 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
} else {
if (!hasPrefix(path, "/"))
throw BadURL("flake reference '%s' is not an absolute path", url);
auto query = decodeQuery(match[2]);
path = canonPath(path + "/" + getOr(query, "dir", ""));
}
@ -197,9 +185,27 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
attrs.insert_or_assign("path", path);
return std::make_pair(FlakeRef(Input::fromAttrs(std::move(attrs)), ""), fragment);
};
/* Check if 'url' is a flake ID. This is an abbreviated syntax for
'flake:<flake-id>?ref=<ref>&rev=<rev>'. */
if (std::regex_match(url, match, flakeRegex)) {
auto parsedURL = ParsedURL{
.url = url,
.base = "flake:" + match.str(1),
.scheme = "flake",
.authority = "",
.path = match[1],
};
return std::make_pair(
FlakeRef(Input::fromURL(parsedURL), ""),
percentDecode(match.str(6)));
}
else {
try {
auto parsedURL = parseURL(url);
std::string fragment;
std::swap(fragment, parsedURL.fragment);
@ -210,6 +216,9 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
return std::make_pair(
FlakeRef(std::move(input), getOr(parsedURL.query, "dir", "")),
fragment);
} catch (BadURL &) {
return parsePathFlakeRef();
}
}
}

View file

@ -335,4 +335,13 @@ namespace nix {
ASSERT_EQ(d, s);
}
TEST(percentEncode, yen) {
// https://en.wikipedia.org/wiki/Percent-encoding#Character_data
std::string s = reinterpret_cast<const char*>(u8"");
std::string e = "%E5%86%86";
ASSERT_EQ(percentEncode(s), e);
ASSERT_EQ(percentDecode(e), s);
}
}

View file

@ -103,7 +103,7 @@ std::string percentEncode(std::string_view s, std::string_view keep)
|| keep.find(c) != std::string::npos)
res += c;
else
res += fmt("%%%02X", (unsigned int) c);
res += fmt("%%%02X", c & 0xFF);
return res;
}