mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2024-11-26 07:46:21 +02:00
8594f3cd5a
Previously, the "file:./" prefix was not correctly recognized in fixGitURL; instead, it was mistaken as a file path, which resulted in a parsed url of the form "file://file:./". This commit fixes the issue by properly detecting the "file:" prefix. Note, however, that unlike "file://", the "file:./" URI is _not_ standardized, but has been widely used to referred to relative file paths. In particular, the "git+file:./" did work for nix<=2.18, and was broken since nix 2.19.0. Finally, this commit fixes the issue completely for the 2.19 series, but is still inadequate for the 2.20 series due to new behaviors from the switch to libgit2. However, it does improve the correctness of parsing even though it is not yet a complete solution.
194 lines
5.4 KiB
C++
194 lines
5.4 KiB
C++
#include "url.hh"
|
|
#include "url-parts.hh"
|
|
#include "util.hh"
|
|
#include "split.hh"
|
|
#include "canon-path.hh"
|
|
|
|
namespace nix {
|
|
|
|
std::regex refRegex(refRegexS, std::regex::ECMAScript);
|
|
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
|
|
std::regex revRegex(revRegexS, std::regex::ECMAScript);
|
|
|
|
ParsedURL parseURL(const std::string & url)
|
|
{
|
|
static std::regex uriRegex(
|
|
"((" + schemeNameRegex + "):"
|
|
+ "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
|
|
+ "(?:\\?(" + queryRegex + "))?"
|
|
+ "(?:#(" + fragmentRegex + "))?",
|
|
std::regex::ECMAScript);
|
|
|
|
std::smatch match;
|
|
|
|
if (std::regex_match(url, match, uriRegex)) {
|
|
auto & base = match[1];
|
|
std::string scheme = match[2];
|
|
auto authority = match[3].matched
|
|
? std::optional<std::string>(match[3]) : std::nullopt;
|
|
std::string path = match[4].matched ? match[4] : match[5];
|
|
auto & query = match[6];
|
|
auto & fragment = match[7];
|
|
|
|
auto transportIsFile = parseUrlScheme(scheme).transport == "file";
|
|
|
|
if (authority && *authority != "" && transportIsFile)
|
|
throw BadURL("file:// URL '%s' has unexpected authority '%s'",
|
|
url, *authority);
|
|
|
|
if (transportIsFile && path.empty())
|
|
path = "/";
|
|
|
|
return ParsedURL{
|
|
.url = url,
|
|
.base = base,
|
|
.scheme = scheme,
|
|
.authority = authority,
|
|
.path = percentDecode(path),
|
|
.query = decodeQuery(query),
|
|
.fragment = percentDecode(std::string(fragment))
|
|
};
|
|
}
|
|
|
|
else
|
|
throw BadURL("'%s' is not a valid URL", url);
|
|
}
|
|
|
|
std::string percentDecode(std::string_view in)
|
|
{
|
|
std::string decoded;
|
|
for (size_t i = 0; i < in.size(); ) {
|
|
if (in[i] == '%') {
|
|
if (i + 2 >= in.size())
|
|
throw BadURL("invalid URI parameter '%s'", in);
|
|
try {
|
|
decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
|
|
i += 3;
|
|
} catch (...) {
|
|
throw BadURL("invalid URI parameter '%s'", in);
|
|
}
|
|
} else
|
|
decoded += in[i++];
|
|
}
|
|
return decoded;
|
|
}
|
|
|
|
std::map<std::string, std::string> decodeQuery(const std::string & query)
|
|
{
|
|
std::map<std::string, std::string> result;
|
|
|
|
for (auto s : tokenizeString<Strings>(query, "&")) {
|
|
auto e = s.find('=');
|
|
if (e != std::string::npos)
|
|
result.emplace(
|
|
s.substr(0, e),
|
|
percentDecode(std::string_view(s).substr(e + 1)));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const static std::string allowedInQuery = ":@/?";
|
|
const static std::string allowedInPath = ":@/";
|
|
|
|
std::string percentEncode(std::string_view s, std::string_view keep)
|
|
{
|
|
std::string res;
|
|
for (auto & c : s)
|
|
// unreserved + keep
|
|
if ((c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z')
|
|
|| (c >= '0' && c <= '9')
|
|
|| strchr("-._~", c)
|
|
|| keep.find(c) != std::string::npos)
|
|
res += c;
|
|
else
|
|
res += fmt("%%%02X", c & 0xFF);
|
|
return res;
|
|
}
|
|
|
|
std::string encodeQuery(const std::map<std::string, std::string> & ss)
|
|
{
|
|
std::string res;
|
|
bool first = true;
|
|
for (auto & [name, value] : ss) {
|
|
if (!first) res += '&';
|
|
first = false;
|
|
res += percentEncode(name, allowedInQuery);
|
|
res += '=';
|
|
res += percentEncode(value, allowedInQuery);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
std::string ParsedURL::to_string() const
|
|
{
|
|
return
|
|
scheme
|
|
+ ":"
|
|
+ (authority ? "//" + *authority : "")
|
|
+ percentEncode(path, allowedInPath)
|
|
+ (query.empty() ? "" : "?" + encodeQuery(query))
|
|
+ (fragment.empty() ? "" : "#" + percentEncode(fragment));
|
|
}
|
|
|
|
bool ParsedURL::operator ==(const ParsedURL & other) const
|
|
{
|
|
return
|
|
scheme == other.scheme
|
|
&& authority == other.authority
|
|
&& path == other.path
|
|
&& query == other.query
|
|
&& fragment == other.fragment;
|
|
}
|
|
|
|
ParsedURL ParsedURL::canonicalise()
|
|
{
|
|
ParsedURL res(*this);
|
|
res.path = CanonPath(res.path).abs();
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
|
|
* into a tuple '(applicationScheme, transportScheme)'
|
|
*
|
|
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
|
|
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
|
|
*/
|
|
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
|
|
{
|
|
auto application = splitPrefixTo(scheme, '+');
|
|
auto transport = scheme;
|
|
return ParsedUrlScheme {
|
|
.application = application,
|
|
.transport = transport,
|
|
};
|
|
}
|
|
|
|
std::string fixGitURL(const std::string & url)
|
|
{
|
|
std::regex scpRegex("([^/]*)@(.*):(.*)");
|
|
if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex))
|
|
return std::regex_replace(url, scpRegex, "ssh://$1@$2/$3");
|
|
if (hasPrefix(url, "file:"))
|
|
return url;
|
|
if (url.find("://") == std::string::npos) {
|
|
return (ParsedURL {
|
|
.scheme = "file",
|
|
.authority = "",
|
|
.path = url
|
|
}).to_string();
|
|
}
|
|
return url;
|
|
}
|
|
|
|
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
|
|
bool isValidSchemeName(std::string_view s)
|
|
{
|
|
static std::regex regex(schemeNameRegex, std::regex::ECMAScript);
|
|
|
|
return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default);
|
|
}
|
|
|
|
}
|