From 20ff61ab252fc1d2bd69987f51a000739b24c670 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Fri, 12 May 2023 19:46:37 +0200 Subject: [PATCH] nix: Reserve shebang line syntax and only parse double backtick quotes Being restrictive about syntax leaves opportunity to improve the syntax and functionality later. --- doc/manual/src/release-notes/rl-next.md | 11 +- src/libutil/args.cc | 152 +++++++++++++++++++++++- src/libutil/util.cc | 43 ------- src/libutil/util.hh | 5 - src/nix/shell.md | 8 +- tests/functional/flakes/flakes.sh | 16 ++- 6 files changed, 177 insertions(+), 58 deletions(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 93d4f432b..4bff3c685 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -2,10 +2,13 @@ - The experimental nix command is now a `#!-interpreter` by appending the contents of any `#! nix` lines and the script's location to a single call. + + Verbatim strings may be passed in double backtick (```` `` ````) quotes. + Some examples: ``` #!/usr/bin/env nix - #! nix shell --file "" hello --command bash + #! nix shell --file ```` hello --command bash hello | cowsay ``` @@ -19,8 +22,10 @@ or ```bash #! /usr/bin/env nix - #! nix shell --impure --expr - #! nix "with (import (builtins.getFlake ''nixpkgs'') {}); terraform.withPlugins (plugins: [ plugins.openstack ])" + #! nix shell --impure --expr `` + #! nix with (import (builtins.getFlake "nixpkgs") {}); + #! nix terraform.withPlugins (plugins: [ plugins.openstack ]) + #! nix `` #! nix --command bash terraform "$@" diff --git a/src/libutil/args.cc b/src/libutil/args.cc index 481ed33ff..ab6e0e266 100644 --- a/src/libutil/args.cc +++ b/src/libutil/args.cc @@ -86,6 +86,147 @@ void RootArgs::parseCmdline(const Strings & _cmdline) Args::parseCmdline(_cmdline, false); } +/** + * Basically this is `typedef std::optional Parser(std::string_view s, Strings & r);` + * + * Except we can't recursively reference the Parser typedef, so we have to write a class. + */ +struct Parser { + std::string_view remaining; + + /** + * @brief Parse the next character(s) + * + * @param r + * @return std::shared_ptr + */ + virtual void operator()(std::shared_ptr & state, Strings & r) = 0; + + Parser(std::string_view s) : remaining(s) {}; +}; + +struct ParseQuoted : public Parser { + /** + * @brief Accumulated string + * + * Parsed argument up to this point. + */ + std::string acc; + + ParseQuoted(std::string_view s) : Parser(s) {}; + + virtual void operator()(std::shared_ptr & state, Strings & r) override; +}; + + +struct ParseUnquoted : public Parser { + /** + * @brief Accumulated string + * + * Parsed argument up to this point. Empty string is not representable in + * unquoted syntax, so we use it for the initial state. + */ + std::string acc; + + ParseUnquoted(std::string_view s) : Parser(s) {}; + + virtual void operator()(std::shared_ptr & state, Strings & r) override { + if (remaining.empty()) { + if (!acc.empty()) + r.push_back(acc); + state = nullptr; // done + return; + } + switch (remaining[0]) { + case ' ': case '\t': case '\n': case '\r': + if (!acc.empty()) + r.push_back(acc); + state = std::make_shared(ParseUnquoted(remaining.substr(1))); + return; + case '`': + if (remaining.size() > 1 && remaining[1] == '`') { + state = std::make_shared(ParseQuoted(remaining.substr(2))); + return; + } + else + throw Error("single backtick is not a supported syntax in the nix shebang."); + + // reserved characters + // meaning to be determined, or may be reserved indefinitely so that + // #!nix syntax looks unambiguous + case '$': + case '*': + case '~': + case '<': + case '>': + case '|': + case ';': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '\'': + case '"': + case '\\': + throw Error("unsupported unquoted character in nix shebang: " + std::string(1, remaining[0]) + ". Use double backticks to escape?"); + + case '#': + if (acc.empty()) { + throw Error ("unquoted nix shebang argument cannot start with #. Use double backticks to escape?"); + } else { + acc += remaining[0]; + remaining = remaining.substr(1); + return; + } + + default: + acc += remaining[0]; + remaining = remaining.substr(1); + return; + } + assert(false); + } +}; + +void ParseQuoted::operator()(std::shared_ptr &state, Strings & r) { + if (remaining.empty()) { + throw Error("unterminated quoted string in nix shebang"); + } + switch (remaining[0]) { + case '`': + if (remaining.size() > 1 && remaining[1] == '`') { + state = std::make_shared(ParseUnquoted(remaining.substr(2))); + r.push_back(acc); + return; + } + else { + acc += remaining[0]; + remaining = remaining.substr(1); + return; + } + default: + acc += remaining[0]; + remaining = remaining.substr(1); + return; + } + assert(false); +} + +static Strings parseShebangContent(std::string_view s) { + Strings result; + std::shared_ptr parserState(std::make_shared(ParseUnquoted(s))); + + // trampoline == iterated strategy pattern + while (parserState) { + auto currentState = parserState; + (*currentState)(parserState, result); + } + + return result; +} + void Args::parseCmdline(const Strings & _cmdline, bool allowShebang) { Strings pendingArgs; @@ -121,13 +262,18 @@ void Args::parseCmdline(const Strings & _cmdline, bool allowShebang) std::string line; std::getline(stream,line); static const std::string commentChars("#/\\%@*-"); + std::string shebangContent; while (std::getline(stream,line) && !line.empty() && commentChars.find(line[0]) != std::string::npos){ line = chomp(line); std::smatch match; - if (std::regex_match(line, match, std::regex("^#!\\s*nix\\s(.*)$"))) - for (const auto & word : shellwords(match[1].str())) - cmdline.push_back(word); + // We match one space after `nix` so that we preserve indentation. + // No space is necessary for an empty line. An empty line has basically no effect. + if (std::regex_match(line, match, std::regex("^#!\\s*nix(:? |$)(.*)$"))) + shebangContent += match[2].str() + "\n"; + } + for (const auto & word : parseShebangContent(shebangContent)) { + cmdline.push_back(word); } cmdline.push_back(script); for (auto pos = savedArgs.begin(); pos != savedArgs.end();pos++) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 6ca1dbd7a..5bb3f374b 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -138,49 +138,6 @@ std::string shellEscape(const std::string_view s) return r; } -/* Recreate the effect of the perl shellwords function, breaking up a - * string into arguments like a shell word, including escapes - */ -std::vector shellwords2(const std::string & s) -{ - std::regex whitespace("^(\\s+).*"); - auto begin = s.cbegin(); - std::vector res; - std::string cur; - enum state { - sBegin, - sQuote - }; - state st = sBegin; - auto it = begin; - for (; it != s.cend(); ++it) { - if (st == sBegin) { - std::smatch match; - if (regex_search(it, s.cend(), match, whitespace)) { - cur.append(begin, it); - res.push_back(cur); - cur.clear(); - it = match[1].second; - begin = it; - } - } - switch (*it) { - case '"': - cur.append(begin, it); - begin = it + 1; - st = st == sBegin ? sQuote : sBegin; - break; - case '\\': - /* perl shellwords mostly just treats the next char as part of the string with no special processing */ - cur.append(begin, it); - begin = ++it; - break; - } - } - cur.append(begin, it); - if (!cur.empty()) res.push_back(cur); - return res; -} void ignoreException(Verbosity lvl) { diff --git a/src/libutil/util.hh b/src/libutil/util.hh index b7d3ac504..27faa4d6d 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -189,11 +189,6 @@ std::string toLower(const std::string & s); std::string shellEscape(const std::string_view s); -/* Recreate the effect of the perl shellwords function, breaking up a - string into arguments like a shell word, including escapes. */ -std::vector shellwords2(const std::string & s); - - /* Exception handling in destructors: print an error message, then ignore the exception. */ void ignoreException(Verbosity lvl = lvlError); diff --git a/src/nix/shell.md b/src/nix/shell.md index 7e0e5f213..7c315fb3f 100644 --- a/src/nix/shell.md +++ b/src/nix/shell.md @@ -109,8 +109,10 @@ package like Terraform: ```bash #! /usr/bin/env nix -#! nix shell --impure --expr -#! nix "with (import (builtins.getFlake ''nixpkgs'') {}); terraform.withPlugins (plugins: [ plugins.openstack ])" +#! nix shell --impure --expr `` +#! nix with (import (builtins.getFlake ''nixpkgs'') {}); +#! nix terraform.withPlugins (plugins: [ plugins.openstack ]) +#! nix `` #! nix --command bash terraform "$@" @@ -118,7 +120,7 @@ terraform "$@" > **Note** > -> You must use double quotes (`"`) when passing a simple Nix expression +> You must use double backticks (```` `` ````) when passing a simple Nix expression > in a nix shell shebang. Finally, using the merging of multiple nix shell shebangs the following diff --git a/tests/functional/flakes/flakes.sh b/tests/functional/flakes/flakes.sh index 28b5e4e0f..a0a34ffa9 100644 --- a/tests/functional/flakes/flakes.sh +++ b/tests/functional/flakes/flakes.sh @@ -80,6 +80,7 @@ chmod +x "$nonFlakeDir/shebang.sh" git -C "$nonFlakeDir" add README.md shebang.sh git -C "$nonFlakeDir" commit -m 'Initial' +# this also tests a fairly trivial double backtick quoted string, ``--command`` cat > $nonFlakeDir/shebang-comments.sh < $nonFlakeDir/shebang-comments.sh < $nonFlakeDir/shebang-reject.sh <&1 | grepQuiet -F 'error: unsupported unquoted character in nix shebang: *. Use double backticks to escape?'