2020-03-30 17:04:18 +03:00
|
|
|
#include "url.hh"
|
2020-09-21 19:22:45 +03:00
|
|
|
#include "url-parts.hh"
|
2020-03-30 17:04:18 +03:00
|
|
|
#include "util.hh"
|
2020-10-16 02:35:24 +03:00
|
|
|
#include "split.hh"
|
2023-11-14 17:00:21 +02:00
|
|
|
#include "canon-path.hh"
|
2023-12-06 16:14:41 +02:00
|
|
|
#include "string.hh"
|
2020-03-30 17:04:18 +03:00
|
|
|
|
|
|
|
namespace nix {
|
|
|
|
|
|
|
|
std::regex refRegex(refRegexS, std::regex::ECMAScript);
|
2020-05-30 13:29:35 +03:00
|
|
|
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
|
2020-03-30 17:04:18 +03:00
|
|
|
std::regex revRegex(revRegexS, std::regex::ECMAScript);
|
|
|
|
|
|
|
|
ParsedURL parseURL(const std::string & url)
|
|
|
|
{
|
|
|
|
static std::regex uriRegex(
|
2023-12-12 18:22:54 +02:00
|
|
|
"((" + schemeNameRegex + "):"
|
2020-03-30 17:04:18 +03:00
|
|
|
+ "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
|
|
|
|
+ "(?:\\?(" + queryRegex + "))?"
|
|
|
|
+ "(?:#(" + queryRegex + "))?",
|
|
|
|
std::regex::ECMAScript);
|
|
|
|
|
|
|
|
std::smatch match;
|
|
|
|
|
|
|
|
if (std::regex_match(url, match, uriRegex)) {
|
|
|
|
auto & base = match[1];
|
|
|
|
std::string scheme = match[2];
|
|
|
|
auto authority = match[3].matched
|
|
|
|
? std::optional<std::string>(match[3]) : std::nullopt;
|
|
|
|
std::string path = match[4].matched ? match[4] : match[5];
|
|
|
|
auto & query = match[6];
|
|
|
|
auto & fragment = match[7];
|
|
|
|
|
2023-01-20 11:31:26 +02:00
|
|
|
auto transportIsFile = parseUrlScheme(scheme).transport == "file";
|
2020-03-30 17:04:18 +03:00
|
|
|
|
2023-01-20 11:31:26 +02:00
|
|
|
if (authority && *authority != "" && transportIsFile)
|
2021-06-30 00:28:43 +03:00
|
|
|
throw BadURL("file:// URL '%s' has unexpected authority '%s'",
|
2020-03-30 17:04:18 +03:00
|
|
|
url, *authority);
|
|
|
|
|
2023-01-20 11:31:26 +02:00
|
|
|
if (transportIsFile && path.empty())
|
2020-03-30 17:04:18 +03:00
|
|
|
path = "/";
|
|
|
|
|
|
|
|
return ParsedURL{
|
|
|
|
.url = url,
|
|
|
|
.base = base,
|
|
|
|
.scheme = scheme,
|
|
|
|
.authority = authority,
|
2023-08-17 23:21:38 +03:00
|
|
|
.path = percentDecode(path),
|
2020-03-30 17:04:18 +03:00
|
|
|
.query = decodeQuery(query),
|
|
|
|
.fragment = percentDecode(std::string(fragment))
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
throw BadURL("'%s' is not a valid URL", url);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string percentDecode(std::string_view in)
|
|
|
|
{
|
|
|
|
std::string decoded;
|
|
|
|
for (size_t i = 0; i < in.size(); ) {
|
|
|
|
if (in[i] == '%') {
|
|
|
|
if (i + 2 >= in.size())
|
|
|
|
throw BadURL("invalid URI parameter '%s'", in);
|
|
|
|
try {
|
|
|
|
decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
|
|
|
|
i += 3;
|
|
|
|
} catch (...) {
|
|
|
|
throw BadURL("invalid URI parameter '%s'", in);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
decoded += in[i++];
|
|
|
|
}
|
|
|
|
return decoded;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<std::string, std::string> decodeQuery(const std::string & query)
|
|
|
|
{
|
|
|
|
std::map<std::string, std::string> result;
|
|
|
|
|
|
|
|
for (auto s : tokenizeString<Strings>(query, "&")) {
|
|
|
|
auto e = s.find('=');
|
|
|
|
if (e != std::string::npos)
|
|
|
|
result.emplace(
|
|
|
|
s.substr(0, e),
|
|
|
|
percentDecode(std::string_view(s).substr(e + 1)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-02-07 17:44:37 +02:00
|
|
|
const static std::string allowedInQuery = ":@/?";
|
|
|
|
const static std::string allowedInPath = ":@/";
|
|
|
|
|
|
|
|
std::string percentEncode(std::string_view s, std::string_view keep)
|
2020-03-30 17:04:18 +03:00
|
|
|
{
|
|
|
|
std::string res;
|
|
|
|
for (auto & c : s)
|
2023-02-07 17:44:37 +02:00
|
|
|
// unreserved + keep
|
2020-03-30 17:04:18 +03:00
|
|
|
if ((c >= 'a' && c <= 'z')
|
|
|
|
|| (c >= 'A' && c <= 'Z')
|
|
|
|
|| (c >= '0' && c <= '9')
|
2023-02-07 17:44:37 +02:00
|
|
|
|| strchr("-._~", c)
|
|
|
|
|| keep.find(c) != std::string::npos)
|
2020-03-30 17:04:18 +03:00
|
|
|
res += c;
|
|
|
|
else
|
2023-05-31 11:36:43 +03:00
|
|
|
res += fmt("%%%02X", c & 0xFF);
|
2020-03-30 17:04:18 +03:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string encodeQuery(const std::map<std::string, std::string> & ss)
|
|
|
|
{
|
|
|
|
std::string res;
|
|
|
|
bool first = true;
|
|
|
|
for (auto & [name, value] : ss) {
|
|
|
|
if (!first) res += '&';
|
|
|
|
first = false;
|
2023-02-07 17:44:37 +02:00
|
|
|
res += percentEncode(name, allowedInQuery);
|
2020-03-30 17:04:18 +03:00
|
|
|
res += '=';
|
2023-02-07 17:44:37 +02:00
|
|
|
res += percentEncode(value, allowedInQuery);
|
2020-03-30 17:04:18 +03:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string ParsedURL::to_string() const
|
|
|
|
{
|
|
|
|
return
|
|
|
|
scheme
|
|
|
|
+ ":"
|
|
|
|
+ (authority ? "//" + *authority : "")
|
2023-02-07 17:44:37 +02:00
|
|
|
+ percentEncode(path, allowedInPath)
|
2020-03-30 17:04:18 +03:00
|
|
|
+ (query.empty() ? "" : "?" + encodeQuery(query))
|
|
|
|
+ (fragment.empty() ? "" : "#" + percentEncode(fragment));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ParsedURL::operator ==(const ParsedURL & other) const
|
|
|
|
{
|
|
|
|
return
|
|
|
|
scheme == other.scheme
|
|
|
|
&& authority == other.authority
|
|
|
|
&& path == other.path
|
|
|
|
&& query == other.query
|
|
|
|
&& fragment == other.fragment;
|
|
|
|
}
|
|
|
|
|
2023-11-14 17:00:21 +02:00
|
|
|
ParsedURL ParsedURL::canonicalise()
|
|
|
|
{
|
|
|
|
ParsedURL res(*this);
|
|
|
|
res.path = CanonPath(res.path).abs();
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-10-16 02:35:24 +03:00
|
|
|
/**
|
|
|
|
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
|
|
|
|
* into a tuple '(applicationScheme, transportScheme)'
|
|
|
|
*
|
|
|
|
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
|
|
|
|
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
|
|
|
|
*/
|
|
|
|
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
|
|
|
|
{
|
|
|
|
auto application = splitPrefixTo(scheme, '+');
|
|
|
|
auto transport = scheme;
|
|
|
|
return ParsedUrlScheme {
|
|
|
|
.application = application,
|
|
|
|
.transport = transport,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-09-28 17:52:28 +03:00
|
|
|
std::string fixGitURL(const std::string & url)
|
|
|
|
{
|
|
|
|
std::regex scpRegex("([^/]*)@(.*):(.*)");
|
|
|
|
if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex))
|
|
|
|
return std::regex_replace(url, scpRegex, "ssh://$1@$2/$3");
|
|
|
|
else {
|
|
|
|
if (url.find("://") == std::string::npos) {
|
|
|
|
return (ParsedURL {
|
|
|
|
.scheme = "file",
|
|
|
|
.authority = "",
|
|
|
|
.path = url
|
|
|
|
}).to_string();
|
|
|
|
} else
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-06 16:14:41 +02:00
|
|
|
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
|
|
|
|
bool isValidSchemeName(std::string_view s)
|
|
|
|
{
|
|
|
|
if (s.empty()) return false;
|
|
|
|
if (!isASCIIAlpha(s[0])) return false;
|
|
|
|
for (auto c : s.substr(1)) {
|
|
|
|
if (isASCIIAlpha(c)) continue;
|
|
|
|
if (isASCIIDigit(c)) continue;
|
|
|
|
if (c == '+') continue;
|
|
|
|
if (c == '-') continue;
|
|
|
|
if (c == '.') continue;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-30 17:04:18 +03:00
|
|
|
}
|