Merge branch 'parser-improvements' of https://github.com/pennae/nix

This commit is contained in:
Eelco Dolstra 2022-02-02 12:45:44 +01:00
commit 17e3f353df
8 changed files with 138 additions and 90 deletions

View file

@ -1374,7 +1374,7 @@ void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value &
/* Nope, so show the first unexpected argument to the
user. */
for (auto & i : *args[0]->attrs)
if (!lambda.formals->argNames.count(i.name))
if (!lambda.formals->has(i.name))
throwTypeError(pos, "%1% called with unexpected argument '%2%'", lambda, i.name);
abort(); // can't happen
}

View file

@ -66,7 +66,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
// we make use of the fact that the parser receives a private copy of the input
// string and can munge around in it.
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
{
char * result = s;
char * t = s;
@ -89,7 +89,7 @@ static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
else *t = c;
t++;
}
return new ExprString(symbols.create({result, size_t(t - result)}));
return {result, size_t(t - result)};
}
@ -176,7 +176,7 @@ or { return OR_KW; }
/* It is impossible to match strings ending with '$' with one
regex because trailing contexts are only valid at the end
of a rule. (A sane but undocumented limitation.) */
yylval->e = unescapeStr(data->symbols, yytext, yyleng);
yylval->str = unescapeStr(data->symbols, yytext, yyleng);
return STR;
}
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
@ -191,26 +191,26 @@ or { return OR_KW; }
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
yylval->e = new ExprIndStr(yytext);
yylval->str = {yytext, (size_t) yyleng, true};
return IND_STR;
}
<IND_STRING>\'\'\$ |
<IND_STRING>\$ {
yylval->e = new ExprIndStr("$");
yylval->str = {"$", 1};
return IND_STR;
}
<IND_STRING>\'\'\' {
yylval->e = new ExprIndStr("''");
yylval->str = {"''", 2};
return IND_STR;
}
<IND_STRING>\'\'\\{ANY} {
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
return IND_STR;
}
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
<IND_STRING>\' {
yylval->e = new ExprIndStr("'");
yylval->str = {"'", 1};
return IND_STR;
}
@ -264,7 +264,7 @@ or { return OR_KW; }
PUSH_STATE(INPATH_SLASH);
else
PUSH_STATE(INPATH);
yylval->e = new ExprString(data->symbols.create(string(yytext)));
yylval->str = {yytext, (size_t) yyleng};
return STR;
}
<INPATH>{ANY} |

View file

@ -110,20 +110,13 @@ struct ExprFloat : Expr
struct ExprString : Expr
{
Symbol s;
string s;
Value v;
ExprString(const Symbol & s) : s(s) { v.mkString(s); };
ExprString(std::string s) : s(std::move(s)) { v.mkString(this->s.data()); };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
/* Temporary class used during parsing of indented strings. */
struct ExprIndStr : Expr
{
string s;
ExprIndStr(const string & s) : s(s) { };
};
struct ExprPath : Expr
{
string s;
@ -223,10 +216,25 @@ struct Formal
struct Formals
{
typedef std::list<Formal> Formals_;
typedef std::vector<Formal> Formals_;
Formals_ formals;
std::set<Symbol> argNames; // used during parsing
bool ellipsis;
bool has(Symbol arg) const {
auto it = std::lower_bound(formals.begin(), formals.end(), arg,
[] (const Formal & f, const Symbol & sym) { return f.name < sym; });
return it != formals.end() && it->name == arg;
}
std::vector<Formal> lexicographicOrder() const
{
std::vector<Formal> result(formals.begin(), formals.end());
std::sort(result.begin(), result.end(),
[] (const Formal & a, const Formal & b) {
return std::string_view(a.name) < std::string_view(b.name);
});
return result;
}
};
struct ExprLambda : Expr
@ -239,11 +247,6 @@ struct ExprLambda : Expr
ExprLambda(const Pos & pos, const Symbol & arg, Formals * formals, Expr * body)
: pos(pos), arg(arg), formals(formals), body(body)
{
if (!arg.empty() && formals && formals->argNames.find(arg) != formals->argNames.end())
throw ParseError({
.msg = hintfmt("duplicate formal function argument '%1%'", arg),
.errPos = pos
});
};
void setName(Symbol & name);
string showNamePos() const;

View file

@ -16,6 +16,8 @@
#ifndef BISON_HEADER
#define BISON_HEADER
#include <variant>
#include "util.hh"
#include "nixexpr.hh"
@ -39,8 +41,22 @@ namespace nix {
{ };
};
struct ParserFormals {
std::vector<Formal> formals;
bool ellipsis = false;
};
}
// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
bool hasIndentation;
operator std::string_view() const { return {p, l}; }
};
#define YY_DECL int yylex \
(YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)
@ -140,21 +156,46 @@ static void addAttr(ExprAttrs * attrs, AttrPath & attrPath,
}
static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
static Formals * toFormals(ParseData & data, ParserFormals * formals,
Pos pos = noPos, Symbol arg = {})
{
if (!formals->argNames.insert(formal.name).second)
std::sort(formals->formals.begin(), formals->formals.end(),
[] (const auto & a, const auto & b) {
return std::tie(a.name, a.pos) < std::tie(b.name, b.pos);
});
std::optional<std::pair<Symbol, Pos>> duplicate;
for (size_t i = 0; i + 1 < formals->formals.size(); i++) {
if (formals->formals[i].name != formals->formals[i + 1].name)
continue;
std::pair thisDup{formals->formals[i].name, formals->formals[i + 1].pos};
duplicate = std::min(thisDup, duplicate.value_or(thisDup));
}
if (duplicate)
throw ParseError({
.msg = hintfmt("duplicate formal function argument '%1%'",
formal.name),
.msg = hintfmt("duplicate formal function argument '%1%'", duplicate->first),
.errPos = duplicate->second
});
Formals result;
result.ellipsis = formals->ellipsis;
result.formals = std::move(formals->formals);
if (arg.set() && result.has(arg))
throw ParseError({
.msg = hintfmt("duplicate formal function argument '%1%'", arg),
.errPos = pos
});
formals->formals.push_front(formal);
delete formals;
return new Formals(std::move(result));
}
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<std::pair<Pos, Expr *> > & es)
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
{
if (es.empty()) return new ExprString(symbols.create(""));
if (es.empty()) return new ExprString("");
/* Figure out the minimum indentation. Note that by design
whitespace-only final lines are not taken into account. (So
@ -163,20 +204,20 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
size_t minIndent = 1000000;
size_t curIndent = 0;
for (auto & [i_pos, i] : es) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i);
if (!e) {
/* Anti-quotations end the current start-of-line whitespace. */
auto * str = std::get_if<StringToken>(&i);
if (!str || !str->hasIndentation) {
/* Anti-quotations and escaped characters end the current start-of-line whitespace. */
if (atStartOfLine) {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
continue;
}
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < str->l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ')
if (str->p[j] == ' ')
curIndent++;
else if (e->s[j] == '\n') {
else if (str->p[j] == '\n') {
/* Empty line, doesn't influence minimum
indentation. */
curIndent = 0;
@ -184,7 +225,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
} else if (e->s[j] == '\n') {
} else if (str->p[j] == '\n') {
atStartOfLine = true;
curIndent = 0;
}
@ -196,33 +237,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
atStartOfLine = true;
size_t curDropped = 0;
size_t n = es.size();
for (vector<std::pair<Pos, Expr *> >::iterator i = es.begin(); i != es.end(); ++i, --n) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i->second);
if (!e) {
atStartOfLine = false;
curDropped = 0;
es2->push_back(*i);
continue;
}
auto i = es.begin();
const auto trimExpr = [&] (Expr * e) {
atStartOfLine = false;
curDropped = 0;
es2->emplace_back(i->first, e);
};
const auto trimString = [&] (const StringToken & t) {
string s2;
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < t.l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ') {
if (t.p[j] == ' ') {
if (curDropped++ >= minIndent)
s2 += e->s[j];
s2 += t.p[j];
}
else if (e->s[j] == '\n') {
else if (t.p[j] == '\n') {
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
} else {
atStartOfLine = false;
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
}
} else {
s2 += e->s[j];
if (e->s[j] == '\n') atStartOfLine = true;
s2 += t.p[j];
if (t.p[j] == '\n') atStartOfLine = true;
}
}
@ -234,7 +273,10 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
s2 = string(s2, 0, p + 1);
}
es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
es2->emplace_back(i->first, new ExprString(s2));
};
for (; i != es.end(); ++i, --n) {
std::visit(overloaded { trimExpr, trimString }, i->second);
}
/* If this is a single string, then don't do a concatenation. */
@ -269,22 +311,17 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
nix::Expr * e;
nix::ExprList * list;
nix::ExprAttrs * attrs;
nix::Formals * formals;
nix::ParserFormals * formals;
nix::Formal * formal;
nix::NixInt n;
nix::NixFloat nf;
// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
operator std::string_view() const { return {p, l}; }
};
StringToken id; // !!! -> Symbol
StringToken path;
StringToken uri;
StringToken str;
std::vector<nix::AttrName> * attrNames;
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
std::vector<std::pair<nix::Pos, std::variant<nix::Expr *, StringToken> > > * ind_string_parts;
}
%type <e> start expr expr_function expr_if expr_op
@ -294,11 +331,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
%type <formals> formals
%type <formal> formal
%type <attrNames> attrs attrpath
%type <string_parts> string_parts_interpolated ind_string_parts
%type <string_parts> string_parts_interpolated
%type <ind_string_parts> ind_string_parts
%type <e> path_start string_parts string_attr
%type <id> attr
%token <id> ID ATTRPATH
%token <e> STR IND_STR
%token <str> STR IND_STR
%token <n> INT
%token <nf> FLOAT
%token <path> PATH HPATH SPATH PATH_END
@ -331,11 +369,17 @@ expr_function
: ID ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($1), 0, $3); }
| '{' formals '}' ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create(""), $2, $5); }
{ $$ = new ExprLambda(CUR_POS, data->symbols.create(""), toFormals(*data, $2), $5); }
| '{' formals '}' '@' ID ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($5), $2, $7); }
{
Symbol arg = data->symbols.create($5);
$$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $2, CUR_POS, arg), $7);
}
| ID '@' '{' formals '}' ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($1), $4, $7); }
{
Symbol arg = data->symbols.create($1);
$$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $4, CUR_POS, arg), $7);
}
| ASSERT expr ';' expr_function
{ $$ = new ExprAssert(CUR_POS, $2, $4); }
| WITH expr ';' expr_function
@ -426,7 +470,7 @@ expr_simple
$$ = new ExprCall(CUR_POS,
new ExprVar(data->symbols.create("__findFile")),
{new ExprVar(data->symbols.create("__nixPath")),
new ExprString(data->symbols.create(path))});
new ExprString(path)});
}
| URI {
static bool noURLLiterals = settings.isExperimentalFeatureEnabled(Xp::NoUrlLiterals);
@ -435,7 +479,7 @@ expr_simple
.msg = hintfmt("URL literals are disabled"),
.errPos = CUR_POS
});
$$ = new ExprString(data->symbols.create($1));
$$ = new ExprString(string($1));
}
| '(' expr ')' { $$ = $2; }
/* Let expressions `let {..., body = ...}' are just desugared
@ -450,18 +494,19 @@ expr_simple
;
string_parts
: STR
: STR { $$ = new ExprString(string($1)); }
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
| { $$ = new ExprString(data->symbols.create("")); }
| { $$ = new ExprString(""); }
;
string_parts_interpolated
: string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
: string_parts_interpolated STR
{ $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(string($2))); }
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
| STR DOLLAR_CURLY expr '}' {
$$ = new vector<std::pair<Pos, Expr *> >;
$$->emplace_back(makeCurPos(@1, data), $1);
$$->emplace_back(makeCurPos(@1, data), new ExprString(string($1)));
$$->emplace_back(makeCurPos(@2, data), $3);
}
;
@ -483,7 +528,7 @@ path_start
ind_string_parts
: ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
| ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| { $$ = new vector<std::pair<Pos, Expr *> >; }
| { $$ = new vector<std::pair<Pos, std::variant<Expr *, StringToken> > >; }
;
binds
@ -515,7 +560,7 @@ attrs
{ $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($2);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
throw ParseError({
@ -532,7 +577,7 @@ attrpath
{ $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($3);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
$$->push_back(AttrName($3));
@ -542,7 +587,7 @@ attrpath
{ $$ = new vector<AttrName>;
ExprString *str = dynamic_cast<ExprString *>($1);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
$$->push_back(AttrName($1));
@ -566,13 +611,13 @@ expr_list
formals
: formal ',' formals
{ $$ = $3; addFormal(CUR_POS, $$, *$1); }
{ $$ = $3; $$->formals.push_back(*$1); }
| formal
{ $$ = new Formals; addFormal(CUR_POS, $$, *$1); $$->ellipsis = false; }
{ $$ = new ParserFormals; $$->formals.push_back(*$1); $$->ellipsis = false; }
|
{ $$ = new Formals; $$->ellipsis = false; }
{ $$ = new ParserFormals; $$->ellipsis = false; }
| ELLIPSIS
{ $$ = new Formals; $$->ellipsis = true; }
{ $$ = new ParserFormals; $$->ellipsis = true; }
;
formal

View file

@ -142,7 +142,7 @@ static void printValueAsXML(EvalState & state, bool strict, bool location,
if (!v.lambda.fun->arg.empty()) attrs["name"] = v.lambda.fun->arg;
if (v.lambda.fun->formals->ellipsis) attrs["ellipsis"] = "1";
XMLOpenElement _(doc, "attrspat", attrs);
for (auto & i : v.lambda.fun->formals->formals)
for (auto & i : v.lambda.fun->formals->lexicographicOrder())
doc.writeEmptyElement("attr", singletonAttrs("name", i.name));
} else
doc.writeEmptyElement("varpat", singletonAttrs("name", v.lambda.fun->arg));

View file

@ -476,8 +476,8 @@ struct CmdFlakeCheck : FlakeCommand
if (!v.isLambda())
throw Error("bundler must be a function");
if (!v.lambda.fun->formals ||
!v.lambda.fun->formals->argNames.count(state->symbols.create("program")) ||
!v.lambda.fun->formals->argNames.count(state->symbols.create("system")))
!v.lambda.fun->formals->has(state->symbols.create("program")) ||
!v.lambda.fun->formals->has(state->symbols.create("system")))
throw Error("bundler must take formal arguments 'program' and 'system'");
} catch (Error & e) {
e.addTrace(pos, hintfmt("while checking the template '%s'", attrPath));

View file

@ -50,6 +50,6 @@ with import ./config.nix;
fetchurl = import <nix/fetchurl.nix> {
url = "file://" + toString ./lang/eval-okay-xml.exp.xml;
sha256 = "0kg4sla7ihm8ijr8cb3117fhl99zrc2bwy1jrngsfmkh8bav4m0v";
sha256 = "sha256-behBlX+DQK/Pjvkuc8Tx68Jwi4E5v86wDq+ZLaHyhQE=";
};
}

View file

@ -31,9 +31,9 @@
<attr name="f">
<function>
<attrspat>
<attr name="z" />
<attr name="x" />
<attr name="y" />
<attr name="z" />
</attrspat>
</function>
</attr>