diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 57b64d607..f2d5f1782 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -3,6 +3,7 @@ #include "util.hh" #include "split.hh" #include "canon-path.hh" +#include "string.hh" namespace nix { @@ -183,4 +184,20 @@ std::string fixGitURL(const std::string & url) } } +// https://www.rfc-editor.org/rfc/rfc3986#section-3.1 +bool isValidSchemeName(std::string_view s) +{ + if (s.empty()) return false; + if (!isASCIIAlpha(s[0])) return false; + for (auto c : s.substr(1)) { + if (isASCIIAlpha(c)) continue; + if (isASCIIDigit(c)) continue; + if (c == '+') continue; + if (c == '-') continue; + if (c == '.') continue; + return false; + } + return true; +} + } diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 833f54678..24806bbff 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -55,4 +55,13 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme); changes absolute paths into file:// URLs. */ std::string fixGitURL(const std::string & url); +/** + * Whether a string is valid as RFC 3986 scheme name. + * Colon `:` is part of the URI; not the scheme name, and therefore rejected. + * See https://www.rfc-editor.org/rfc/rfc3986#section-3.1 + * + * Does not check whether the scheme is understood, as that's context-dependent. + */ +bool isValidSchemeName(std::string_view scheme); + } diff --git a/tests/unit/libutil/url.cc b/tests/unit/libutil/url.cc index a678dad20..09fa4e218 100644 --- a/tests/unit/libutil/url.cc +++ b/tests/unit/libutil/url.cc @@ -344,4 +344,22 @@ namespace nix { ASSERT_EQ(percentDecode(e), s); } +TEST(nix, isValidSchemeName) { + ASSERT_TRUE(isValidSchemeName("http")); + ASSERT_TRUE(isValidSchemeName("https")); + ASSERT_TRUE(isValidSchemeName("file")); + ASSERT_TRUE(isValidSchemeName("file+https")); + ASSERT_TRUE(isValidSchemeName("fi.le")); + ASSERT_TRUE(isValidSchemeName("file-ssh")); + ASSERT_TRUE(isValidSchemeName("file+")); + ASSERT_TRUE(isValidSchemeName("file.")); + ASSERT_TRUE(isValidSchemeName("file1")); + ASSERT_FALSE(isValidSchemeName("file:")); + ASSERT_FALSE(isValidSchemeName("file/")); + ASSERT_FALSE(isValidSchemeName("+file")); + ASSERT_FALSE(isValidSchemeName(".file")); + ASSERT_FALSE(isValidSchemeName("-file")); + ASSERT_FALSE(isValidSchemeName("1file")); +} + }