nix: Reserve shebang line syntax and only parse double backtick quotes

Being restrictive about syntax leaves opportunity to improve the
syntax and functionality later.
This commit is contained in:
Robert Hensing 2023-05-12 19:46:37 +02:00 committed by tomberek
parent cc68ed8ff7
commit 20ff61ab25
6 changed files with 177 additions and 58 deletions

View File

@ -2,10 +2,13 @@
- The experimental nix command is now a `#!-interpreter` by appending the
contents of any `#! nix` lines and the script's location to a single call.
Verbatim strings may be passed in double backtick (```` `` ````) quotes.
Some examples:
```
#!/usr/bin/env nix
#! nix shell --file "<nixpkgs>" hello --command bash
#! nix shell --file ``<nixpkgs>`` hello --command bash
hello | cowsay
```
@ -19,8 +22,10 @@
or
```bash
#! /usr/bin/env nix
#! nix shell --impure --expr
#! nix "with (import (builtins.getFlake ''nixpkgs'') {}); terraform.withPlugins (plugins: [ plugins.openstack ])"
#! nix shell --impure --expr ``
#! nix with (import (builtins.getFlake "nixpkgs") {});
#! nix terraform.withPlugins (plugins: [ plugins.openstack ])
#! nix ``
#! nix --command bash
terraform "$@"

View File

@ -86,6 +86,147 @@ void RootArgs::parseCmdline(const Strings & _cmdline)
Args::parseCmdline(_cmdline, false);
}
/**
* Basically this is `typedef std::optional<Parser> Parser(std::string_view s, Strings & r);`
*
* Except we can't recursively reference the Parser typedef, so we have to write a class.
*/
struct Parser {
std::string_view remaining;
/**
* @brief Parse the next character(s)
*
* @param r
* @return std::shared_ptr<Parser>
*/
virtual void operator()(std::shared_ptr<Parser> & state, Strings & r) = 0;
Parser(std::string_view s) : remaining(s) {};
};
struct ParseQuoted : public Parser {
/**
* @brief Accumulated string
*
* Parsed argument up to this point.
*/
std::string acc;
ParseQuoted(std::string_view s) : Parser(s) {};
virtual void operator()(std::shared_ptr<Parser> & state, Strings & r) override;
};
struct ParseUnquoted : public Parser {
/**
* @brief Accumulated string
*
* Parsed argument up to this point. Empty string is not representable in
* unquoted syntax, so we use it for the initial state.
*/
std::string acc;
ParseUnquoted(std::string_view s) : Parser(s) {};
virtual void operator()(std::shared_ptr<Parser> & state, Strings & r) override {
if (remaining.empty()) {
if (!acc.empty())
r.push_back(acc);
state = nullptr; // done
return;
}
switch (remaining[0]) {
case ' ': case '\t': case '\n': case '\r':
if (!acc.empty())
r.push_back(acc);
state = std::make_shared<ParseUnquoted>(ParseUnquoted(remaining.substr(1)));
return;
case '`':
if (remaining.size() > 1 && remaining[1] == '`') {
state = std::make_shared<ParseQuoted>(ParseQuoted(remaining.substr(2)));
return;
}
else
throw Error("single backtick is not a supported syntax in the nix shebang.");
// reserved characters
// meaning to be determined, or may be reserved indefinitely so that
// #!nix syntax looks unambiguous
case '$':
case '*':
case '~':
case '<':
case '>':
case '|':
case ';':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case '\'':
case '"':
case '\\':
throw Error("unsupported unquoted character in nix shebang: " + std::string(1, remaining[0]) + ". Use double backticks to escape?");
case '#':
if (acc.empty()) {
throw Error ("unquoted nix shebang argument cannot start with #. Use double backticks to escape?");
} else {
acc += remaining[0];
remaining = remaining.substr(1);
return;
}
default:
acc += remaining[0];
remaining = remaining.substr(1);
return;
}
assert(false);
}
};
void ParseQuoted::operator()(std::shared_ptr<Parser> &state, Strings & r) {
if (remaining.empty()) {
throw Error("unterminated quoted string in nix shebang");
}
switch (remaining[0]) {
case '`':
if (remaining.size() > 1 && remaining[1] == '`') {
state = std::make_shared<ParseUnquoted>(ParseUnquoted(remaining.substr(2)));
r.push_back(acc);
return;
}
else {
acc += remaining[0];
remaining = remaining.substr(1);
return;
}
default:
acc += remaining[0];
remaining = remaining.substr(1);
return;
}
assert(false);
}
static Strings parseShebangContent(std::string_view s) {
Strings result;
std::shared_ptr<Parser> parserState(std::make_shared<ParseUnquoted>(ParseUnquoted(s)));
// trampoline == iterated strategy pattern
while (parserState) {
auto currentState = parserState;
(*currentState)(parserState, result);
}
return result;
}
void Args::parseCmdline(const Strings & _cmdline, bool allowShebang)
{
Strings pendingArgs;
@ -121,13 +262,18 @@ void Args::parseCmdline(const Strings & _cmdline, bool allowShebang)
std::string line;
std::getline(stream,line);
static const std::string commentChars("#/\\%@*-");
std::string shebangContent;
while (std::getline(stream,line) && !line.empty() && commentChars.find(line[0]) != std::string::npos){
line = chomp(line);
std::smatch match;
if (std::regex_match(line, match, std::regex("^#!\\s*nix\\s(.*)$")))
for (const auto & word : shellwords(match[1].str()))
cmdline.push_back(word);
// We match one space after `nix` so that we preserve indentation.
// No space is necessary for an empty line. An empty line has basically no effect.
if (std::regex_match(line, match, std::regex("^#!\\s*nix(:? |$)(.*)$")))
shebangContent += match[2].str() + "\n";
}
for (const auto & word : parseShebangContent(shebangContent)) {
cmdline.push_back(word);
}
cmdline.push_back(script);
for (auto pos = savedArgs.begin(); pos != savedArgs.end();pos++)

View File

@ -138,49 +138,6 @@ std::string shellEscape(const std::string_view s)
return r;
}
/* Recreate the effect of the perl shellwords function, breaking up a
* string into arguments like a shell word, including escapes
*/
std::vector<std::string> shellwords2(const std::string & s)
{
std::regex whitespace("^(\\s+).*");
auto begin = s.cbegin();
std::vector<std::string> res;
std::string cur;
enum state {
sBegin,
sQuote
};
state st = sBegin;
auto it = begin;
for (; it != s.cend(); ++it) {
if (st == sBegin) {
std::smatch match;
if (regex_search(it, s.cend(), match, whitespace)) {
cur.append(begin, it);
res.push_back(cur);
cur.clear();
it = match[1].second;
begin = it;
}
}
switch (*it) {
case '"':
cur.append(begin, it);
begin = it + 1;
st = st == sBegin ? sQuote : sBegin;
break;
case '\\':
/* perl shellwords mostly just treats the next char as part of the string with no special processing */
cur.append(begin, it);
begin = ++it;
break;
}
}
cur.append(begin, it);
if (!cur.empty()) res.push_back(cur);
return res;
}
void ignoreException(Verbosity lvl)
{

View File

@ -189,11 +189,6 @@ std::string toLower(const std::string & s);
std::string shellEscape(const std::string_view s);
/* Recreate the effect of the perl shellwords function, breaking up a
string into arguments like a shell word, including escapes. */
std::vector<std::string> shellwords2(const std::string & s);
/* Exception handling in destructors: print an error message, then
ignore the exception. */
void ignoreException(Verbosity lvl = lvlError);

View File

@ -109,8 +109,10 @@ package like Terraform:
```bash
#! /usr/bin/env nix
#! nix shell --impure --expr
#! nix "with (import (builtins.getFlake ''nixpkgs'') {}); terraform.withPlugins (plugins: [ plugins.openstack ])"
#! nix shell --impure --expr ``
#! nix with (import (builtins.getFlake ''nixpkgs'') {});
#! nix terraform.withPlugins (plugins: [ plugins.openstack ])
#! nix ``
#! nix --command bash
terraform "$@"
@ -118,7 +120,7 @@ terraform "$@"
> **Note**
>
> You must use double quotes (`"`) when passing a simple Nix expression
> You must use double backticks (```` `` ````) when passing a simple Nix expression
> in a nix shell shebang.
Finally, using the merging of multiple nix shell shebangs the following

View File

@ -80,6 +80,7 @@ chmod +x "$nonFlakeDir/shebang.sh"
git -C "$nonFlakeDir" add README.md shebang.sh
git -C "$nonFlakeDir" commit -m 'Initial'
# this also tests a fairly trivial double backtick quoted string, ``--command``
cat > $nonFlakeDir/shebang-comments.sh <<EOF
#! $(type -P env) nix
# some comments
@ -87,11 +88,23 @@ cat > $nonFlakeDir/shebang-comments.sh <<EOF
# some comments
#! nix --offline shell
#! nix flake1#fooScript
#! nix --no-write-lock-file --command bash
#! nix --no-write-lock-file ``--command`` bash
foo
EOF
chmod +x $nonFlakeDir/shebang-comments.sh
cat > $nonFlakeDir/shebang-reject.sh <<EOF
#! $(type -P env) nix
# some comments
# some comments
# some comments
#! nix --offline shell *
#! nix flake1#fooScript
#! nix --no-write-lock-file --command bash
foo
EOF
chmod +x $nonFlakeDir/shebang-reject.sh
# Construct a custom registry, additionally test the --registry flag
nix registry add --registry "$registry" flake1 "git+file://$flake1Dir"
nix registry add --registry "$registry" flake2 "git+file://$percentEncodedFlake2Dir"
@ -539,3 +552,4 @@ expectStderr 1 nix flake metadata "$flake2Dir" --no-allow-dirty --reference-lock
[[ $($nonFlakeDir/shebang.sh) = "foo" ]]
[[ $($nonFlakeDir/shebang.sh "bar") = "foo"$'\n'"bar" ]]
[[ $($nonFlakeDir/shebang-comments.sh ) = "foo" ]]
expect 1 $nonFlakeDir/shebang-reject.sh 2>&1 | grepQuiet -F 'error: unsupported unquoted character in nix shebang: *. Use double backticks to escape?'