From 429a197d246782e286fe7d9f6fad0b9daf2941ec Mon Sep 17 00:00:00 2001 From: Ryan Hendrickson Date: Sat, 20 Jul 2024 12:04:25 -0400 Subject: [PATCH 1/4] parser.y: use names where I'll be refactoring --- src/libexpr/parser.y | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 9ad41c148..256244a75 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -180,22 +180,22 @@ expr_function $$ = me; SET_DOC_POS(me, @1); } - | '{' formals '}' ':' expr_function - { auto me = new ExprLambda(CUR_POS, state->validateFormals($2), $5); + | '{' formals '}' ':' expr_function[body] + { auto me = new ExprLambda(CUR_POS, state->validateFormals($formals), $body); $$ = me; SET_DOC_POS(me, @1); } - | '{' formals '}' '@' ID ':' expr_function + | '{' formals '}' '@' ID ':' expr_function[body] { - auto arg = state->symbols.create($5); - auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($2, CUR_POS, arg), $7); + auto arg = state->symbols.create($ID); + auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formals, CUR_POS, arg), $body); $$ = me; SET_DOC_POS(me, @1); } - | ID '@' '{' formals '}' ':' expr_function + | ID '@' '{' formals '}' ':' expr_function[body] { - auto arg = state->symbols.create($1); - auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($4, CUR_POS, arg), $7); + auto arg = state->symbols.create($ID); + auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formals, CUR_POS, arg), $body); $$ = me; SET_DOC_POS(me, @1); } @@ -364,50 +364,50 @@ ind_string_parts ; binds - : binds attrpath '=' expr ';' { - $$ = $1; + : binds[accum] attrpath '=' expr ';' { + $$ = $accum; - auto pos = state->at(@2); - auto exprPos = state->at(@4); + auto pos = state->at(@attrpath); + auto exprPos = state->at(@expr); { auto it = state->lexerState.positionToDocComment.find(pos); if (it != state->lexerState.positionToDocComment.end()) { - $4->setDocComment(it->second); + $expr->setDocComment(it->second); state->lexerState.positionToDocComment.emplace(exprPos, it->second); } } - state->addAttr($$, std::move(*$2), $4, pos); - delete $2; + state->addAttr($$, std::move(*$attrpath), $expr, pos); + delete $attrpath; } - | binds INHERIT attrs ';' - { $$ = $1; - for (auto & [i, iPos] : *$3) { - if ($$->attrs.find(i.symbol) != $$->attrs.end()) - state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos); - $$->attrs.emplace( + | binds[accum] INHERIT attrs ';' + { $$ = $accum; + for (auto & [i, iPos] : *$attrs) { + if ($accum->attrs.find(i.symbol) != $accum->attrs.end()) + state->dupAttr(i.symbol, iPos, $accum->attrs[i.symbol].pos); + $accum->attrs.emplace( i.symbol, ExprAttrs::AttrDef(new ExprVar(iPos, i.symbol), iPos, ExprAttrs::AttrDef::Kind::Inherited)); } - delete $3; + delete $attrs; } - | binds INHERIT '(' expr ')' attrs ';' - { $$ = $1; - if (!$$->inheritFromExprs) - $$->inheritFromExprs = std::make_unique>(); - $$->inheritFromExprs->push_back($4); - auto from = new nix::ExprInheritFrom(state->at(@4), $$->inheritFromExprs->size() - 1); - for (auto & [i, iPos] : *$6) { - if ($$->attrs.find(i.symbol) != $$->attrs.end()) - state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos); - $$->attrs.emplace( + | binds[accum] INHERIT '(' expr ')' attrs ';' + { $$ = $accum; + if (!$accum->inheritFromExprs) + $accum->inheritFromExprs = std::make_unique>(); + $accum->inheritFromExprs->push_back($expr); + auto from = new nix::ExprInheritFrom(state->at(@expr), $accum->inheritFromExprs->size() - 1); + for (auto & [i, iPos] : *$attrs) { + if ($accum->attrs.find(i.symbol) != $accum->attrs.end()) + state->dupAttr(i.symbol, iPos, $accum->attrs[i.symbol].pos); + $accum->attrs.emplace( i.symbol, ExprAttrs::AttrDef( new ExprSelect(iPos, from, i.symbol), iPos, ExprAttrs::AttrDef::Kind::InheritedFrom)); } - delete $6; + delete $attrs; } | { $$ = new ExprAttrs(state->at(@0)); } ; @@ -468,10 +468,10 @@ expr_list ; formals - : formal ',' formals - { $$ = $3; $$->formals.emplace_back(*$1); delete $1; } + : formal ',' formals[accum] + { $$ = $accum; $$->formals.emplace_back(*$formal); delete $formal; } | formal - { $$ = new Formals; $$->formals.emplace_back(*$1); $$->ellipsis = false; delete $1; } + { $$ = new Formals; $$->formals.emplace_back(*$formal); $$->ellipsis = false; delete $formal; } | { $$ = new Formals; $$->ellipsis = false; } | ELLIPSIS From b0a8430e851790ebac4e2324084bd6d86b995316 Mon Sep 17 00:00:00 2001 From: Ryan Hendrickson Date: Sat, 20 Jul 2024 12:04:25 -0400 Subject: [PATCH 2/4] parser.y: move attr doc setting into addAttr --- src/libexpr/parser-state.hh | 11 +++++++++-- src/libexpr/parser.y | 13 +------------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser-state.hh index 4bb5c9204..c23ef32a5 100644 --- a/src/libexpr/parser-state.hh +++ b/src/libexpr/parser-state.hh @@ -86,7 +86,7 @@ struct ParserState void dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos); void dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos); - void addAttr(ExprAttrs * attrs, AttrPath && attrPath, Expr * e, const PosIdx pos); + void addAttr(ExprAttrs * attrs, AttrPath && attrPath, const ParserLocation & loc, Expr * e, const ParserLocation & exprLoc); Formals * validateFormals(Formals * formals, PosIdx pos = noPos, Symbol arg = {}); Expr * stripIndentation(const PosIdx pos, std::vector>> && es); @@ -110,11 +110,12 @@ inline void ParserState::dupAttr(Symbol attr, const PosIdx pos, const PosIdx pre }); } -inline void ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, Expr * e, const PosIdx pos) +inline void ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, const ParserLocation & loc, Expr * e, const ParserLocation & exprLoc) { AttrPath::iterator i; // All attrpaths have at least one attr assert(!attrPath.empty()); + auto pos = at(loc); // Checking attrPath validity. // =========================== for (i = attrPath.begin(); i + 1 < attrPath.end(); i++) { @@ -179,6 +180,12 @@ inline void ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, Expr * } else { attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, e, pos)); } + + auto it = lexerState.positionToDocComment.find(pos); + if (it != lexerState.positionToDocComment.end()) { + e->setDocComment(it->second); + lexerState.positionToDocComment.emplace(at(exprLoc), it->second); + } } inline Formals * ParserState::validateFormals(Formals * formals, PosIdx pos, Symbol arg) diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 256244a75..6386747f5 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -366,18 +366,7 @@ ind_string_parts binds : binds[accum] attrpath '=' expr ';' { $$ = $accum; - - auto pos = state->at(@attrpath); - auto exprPos = state->at(@expr); - { - auto it = state->lexerState.positionToDocComment.find(pos); - if (it != state->lexerState.positionToDocComment.end()) { - $expr->setDocComment(it->second); - state->lexerState.positionToDocComment.emplace(exprPos, it->second); - } - } - - state->addAttr($$, std::move(*$attrpath), $expr, pos); + state->addAttr($$, std::move(*$attrpath), @attrpath, $expr, @expr); delete $attrpath; } | binds[accum] INHERIT attrs ';' From 6e3b9e6a4de7430c8b130a7c87d7f5df68cf6c86 Mon Sep 17 00:00:00 2001 From: Ryan Hendrickson Date: Sat, 20 Jul 2024 12:04:25 -0400 Subject: [PATCH 3/4] parser.y: eliminate conflicts --- src/libexpr/parser.y | 61 ++++++++++++------- .../lang/parse-fail-undef-var-2.err.exp | 2 +- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 6386747f5..2069931e1 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -8,8 +8,8 @@ %parse-param { nix::ParserState * state } %lex-param { void * scanner } %lex-param { nix::ParserState * state } -%expect 1 -%expect-rr 1 +%expect 0 +%expect-rr 0 %code requires { @@ -133,8 +133,8 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) { %type expr_select expr_simple expr_app %type expr_pipe_from expr_pipe_into %type expr_list -%type binds -%type formals +%type binds binds1 +%type formals formal_set %type formal %type attrpath %type attrs @@ -180,22 +180,22 @@ expr_function $$ = me; SET_DOC_POS(me, @1); } - | '{' formals '}' ':' expr_function[body] - { auto me = new ExprLambda(CUR_POS, state->validateFormals($formals), $body); + | formal_set ':' expr_function[body] + { auto me = new ExprLambda(CUR_POS, state->validateFormals($formal_set), $body); $$ = me; SET_DOC_POS(me, @1); } - | '{' formals '}' '@' ID ':' expr_function[body] + | formal_set '@' ID ':' expr_function[body] { auto arg = state->symbols.create($ID); - auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formals, CUR_POS, arg), $body); + auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formal_set, CUR_POS, arg), $body); $$ = me; SET_DOC_POS(me, @1); } - | ID '@' '{' formals '}' ':' expr_function[body] + | ID '@' formal_set ':' expr_function[body] { auto arg = state->symbols.create($ID); - auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formals, CUR_POS, arg), $body); + auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($formal_set, CUR_POS, arg), $body); $$ = me; SET_DOC_POS(me, @1); } @@ -311,11 +311,13 @@ expr_simple /* Let expressions `let {..., body = ...}' are just desugared into `(rec {..., body = ...}).body'. */ | LET '{' binds '}' - { $3->recursive = true; $$ = new ExprSelect(noPos, $3, state->s.body); } + { $3->recursive = true; $3->pos = CUR_POS; $$ = new ExprSelect(noPos, $3, state->s.body); } | REC '{' binds '}' - { $3->recursive = true; $$ = $3; } - | '{' binds '}' - { $$ = $2; } + { $3->recursive = true; $3->pos = CUR_POS; $$ = $3; } + | '{' binds1 '}' + { $2->pos = CUR_POS; $$ = $2; } + | '{' '}' + { $$ = new ExprAttrs(CUR_POS); } | '[' expr_list ']' { $$ = $2; } ; @@ -364,8 +366,13 @@ ind_string_parts ; binds - : binds[accum] attrpath '=' expr ';' { - $$ = $accum; + : binds1 + | { $$ = new ExprAttrs; } + ; + +binds1 + : binds1[accum] attrpath '=' expr ';' + { $$ = $accum; state->addAttr($$, std::move(*$attrpath), @attrpath, $expr, @expr); delete $attrpath; } @@ -398,7 +405,11 @@ binds } delete $attrs; } - | { $$ = new ExprAttrs(state->at(@0)); } + | attrpath '=' expr ';' + { $$ = new ExprAttrs; + state->addAttr($$, std::move(*$attrpath), @attrpath, $expr, @expr); + delete $attrpath; + } ; attrs @@ -456,15 +467,19 @@ expr_list | { $$ = new ExprList; } ; +formal_set + : '{' formals ',' ELLIPSIS '}' { $$ = $formals; $$->ellipsis = true; } + | '{' ELLIPSIS '}' { $$ = new Formals; $$->ellipsis = true; } + | '{' formals ',' '}' { $$ = $formals; $$->ellipsis = false; } + | '{' formals '}' { $$ = $formals; $$->ellipsis = false; } + | '{' '}' { $$ = new Formals; $$->ellipsis = false; } + ; + formals - : formal ',' formals[accum] + : formals[accum] ',' formal { $$ = $accum; $$->formals.emplace_back(*$formal); delete $formal; } | formal - { $$ = new Formals; $$->formals.emplace_back(*$formal); $$->ellipsis = false; delete $formal; } - | - { $$ = new Formals; $$->ellipsis = false; } - | ELLIPSIS - { $$ = new Formals; $$->ellipsis = true; } + { $$ = new Formals; $$->formals.emplace_back(*$formal); delete $formal; } ; formal diff --git a/tests/functional/lang/parse-fail-undef-var-2.err.exp b/tests/functional/lang/parse-fail-undef-var-2.err.exp index 393c454dd..96e87b2aa 100644 --- a/tests/functional/lang/parse-fail-undef-var-2.err.exp +++ b/tests/functional/lang/parse-fail-undef-var-2.err.exp @@ -1,4 +1,4 @@ -error: syntax error, unexpected ':', expecting '}' +error: syntax error, unexpected ':', expecting '}' or ',' at «stdin»:3:13: 2| 3| f = {x, y : ["baz" "bar" z "bat"]}: x + y; From 18db46a6cb72acaae748833b09b428a7794bd9c8 Mon Sep 17 00:00:00 2001 From: Ryan Hendrickson Date: Mon, 22 Jul 2024 11:36:09 -0400 Subject: [PATCH 4/4] parser.y: GLR -> LALR --- src/libexpr/parser-state.hh | 1 + src/libexpr/parser.y | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser-state.hh index c23ef32a5..8ad0d9ad7 100644 --- a/src/libexpr/parser-state.hh +++ b/src/libexpr/parser-state.hh @@ -20,6 +20,7 @@ struct StringToken operator std::string_view() const { return {p, l}; } }; +// This type must be trivially copyable; see YYLTYPE_IS_TRIVIAL in parser.y. struct ParserLocation { int beginOffset; diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 2069931e1..f2ccca7fc 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -1,4 +1,4 @@ -%glr-parser +%define api.location.type { ::nix::ParserLocation } %define api.pure %locations %define parse.error verbose @@ -9,7 +9,6 @@ %lex-param { void * scanner } %lex-param { nix::ParserState * state } %expect 0 -%expect-rr 0 %code requires { @@ -27,7 +26,17 @@ #include "eval-settings.hh" #include "parser-state.hh" -#define YYLTYPE ::nix::ParserLocation +// Bison seems to have difficulty growing the parser stack when using C++ with +// a custom location type. This undocumented macro tells Bison that our +// location type is "trivially copyable" in C++-ese, so it is safe to use the +// same memcpy macro it uses to grow the stack that it uses with its own +// default location type. Without this, we get "error: memory exhausted" when +// parsing some large Nix files. Our other options are to increase the initial +// stack size (200 by default) to be as large as we ever want to support (so +// that growing the stack is unnecessary), or redefine the stack-relocation +// macro ourselves (which is also undocumented). +#define YYLTYPE_IS_TRIVIAL 1 + #define YY_DECL int yylex \ (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParserState * state) @@ -77,7 +86,7 @@ YY_DECL; using namespace nix; -#define CUR_POS state->at(*yylocp) +#define CUR_POS state->at(yyloc) void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char * error)