2004-01-30 17:21:42 +02:00
|
|
|
%option reentrant bison-bridge bison-locations
|
2023-12-10 13:59:51 +02:00
|
|
|
%option align
|
2004-01-30 17:21:42 +02:00
|
|
|
%option noyywrap
|
|
|
|
%option never-interactive
|
2015-07-02 19:39:02 +03:00
|
|
|
%option stack
|
|
|
|
%option nodefault
|
|
|
|
%option nounput noyy_top_state
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
%s DEFAULT
|
2006-05-01 17:01:47 +03:00
|
|
|
%x STRING
|
2007-11-30 18:48:45 +02:00
|
|
|
%x IND_STRING
|
2021-07-29 19:03:07 +03:00
|
|
|
%x INPATH
|
|
|
|
%x INPATH_SLASH
|
|
|
|
%x PATH_START
|
2006-05-01 17:01:47 +03:00
|
|
|
|
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
%{
|
2020-12-02 15:33:20 +02:00
|
|
|
#ifdef __clang__
|
2020-12-01 15:57:56 +02:00
|
|
|
#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
|
2020-12-02 15:33:20 +02:00
|
|
|
#endif
|
2020-12-01 15:57:56 +02:00
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
#include "nixexpr.hh"
|
2006-09-05 00:06:23 +03:00
|
|
|
#include "parser-tab.hh"
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
using namespace nix;
|
|
|
|
|
|
|
|
namespace nix {
|
|
|
|
|
2024-01-15 17:52:18 +02:00
|
|
|
#define CUR_POS state->at(*yylloc)
|
2022-03-24 10:10:33 +02:00
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
static void initLoc(YYLTYPE * loc)
|
|
|
|
{
|
2024-01-29 07:19:23 +02:00
|
|
|
loc->first_line = loc->last_line = 0;
|
|
|
|
loc->first_column = loc->last_column = 0;
|
2004-01-30 17:21:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
|
|
|
{
|
2023-12-10 14:00:18 +02:00
|
|
|
loc->stash();
|
2021-09-30 02:37:51 +03:00
|
|
|
|
2010-05-06 19:46:48 +03:00
|
|
|
loc->first_column = loc->last_column;
|
2024-01-29 07:19:23 +02:00
|
|
|
loc->last_column += len;
|
2004-01-30 17:21:42 +02:00
|
|
|
}
|
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
|
2021-12-21 11:28:05 +02:00
|
|
|
// we make use of the fact that the parser receives a private copy of the input
|
|
|
|
// string and can munge around in it.
|
2022-01-19 14:39:42 +02:00
|
|
|
static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
2006-09-05 00:06:23 +03:00
|
|
|
{
|
2021-12-21 11:28:05 +02:00
|
|
|
char * result = s;
|
|
|
|
char * t = s;
|
2006-09-05 00:36:15 +03:00
|
|
|
char c;
|
2021-12-21 11:28:05 +02:00
|
|
|
// the input string is terminated with *two* NULs, so we can safely take
|
|
|
|
// *one* character after the one being checked against.
|
2006-09-05 00:36:15 +03:00
|
|
|
while ((c = *s++)) {
|
|
|
|
if (c == '\\') {
|
|
|
|
c = *s++;
|
2021-12-21 11:28:05 +02:00
|
|
|
if (c == 'n') *t = '\n';
|
|
|
|
else if (c == 'r') *t = '\r';
|
|
|
|
else if (c == 't') *t = '\t';
|
|
|
|
else *t = c;
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
|
|
|
else if (c == '\r') {
|
|
|
|
/* Normalise CR and CR/LF into LF. */
|
2021-12-21 11:28:05 +02:00
|
|
|
*t = '\n';
|
2006-09-05 00:36:15 +03:00
|
|
|
if (*s == '\n') s++; /* cr/lf */
|
|
|
|
}
|
2021-12-21 11:28:05 +02:00
|
|
|
else *t = c;
|
|
|
|
t++;
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
2022-01-19 14:39:42 +02:00
|
|
|
return {result, size_t(t - result)};
|
2006-09-05 00:06:23 +03:00
|
|
|
}
|
|
|
|
|
2013-09-02 17:29:15 +03:00
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2024-02-25 01:52:16 +02:00
|
|
|
// yacc generates code that uses unannotated fallthrough.
|
|
|
|
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
#define YY_USER_INIT initLoc(yylloc)
|
|
|
|
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
|
|
|
|
|
2015-07-02 19:39:02 +03:00
|
|
|
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
|
|
|
|
#define POP_STATE() yy_pop_state(yyscanner)
|
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
%}
|
|
|
|
|
|
|
|
|
2018-02-25 23:51:47 +02:00
|
|
|
ANY .|\n
|
2012-09-27 22:43:08 +03:00
|
|
|
ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
|
2004-01-30 17:21:42 +02:00
|
|
|
INT [0-9]+
|
2016-01-05 10:54:49 +02:00
|
|
|
FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
|
2021-07-29 19:03:07 +03:00
|
|
|
PATH_CHAR [a-zA-Z0-9\.\_\-\+]
|
|
|
|
PATH {PATH_CHAR}*(\/{PATH_CHAR}+)+\/?
|
|
|
|
PATH_SEG {PATH_CHAR}*\/
|
|
|
|
HPATH \~(\/{PATH_CHAR}+)+\/?
|
|
|
|
HPATH_START \~\/
|
|
|
|
SPATH \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\>
|
2017-11-14 16:10:52 +02:00
|
|
|
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
|
|
|
|
if { return IF; }
|
|
|
|
then { return THEN; }
|
|
|
|
else { return ELSE; }
|
|
|
|
assert { return ASSERT; }
|
2004-10-25 19:54:56 +03:00
|
|
|
with { return WITH; }
|
2004-01-30 17:21:42 +02:00
|
|
|
let { return LET; }
|
2024-01-13 02:46:48 +02:00
|
|
|
in { return IN_KW; }
|
2004-01-30 17:21:42 +02:00
|
|
|
rec { return REC; }
|
2004-02-02 23:39:33 +02:00
|
|
|
inherit { return INHERIT; }
|
2011-07-13 15:19:57 +03:00
|
|
|
or { return OR_KW; }
|
2008-08-14 17:00:44 +03:00
|
|
|
\.\.\. { return ELLIPSIS; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
\=\= { return EQ; }
|
|
|
|
\!\= { return NEQ; }
|
2013-08-02 19:39:40 +03:00
|
|
|
\<\= { return LEQ; }
|
|
|
|
\>\= { return GEQ; }
|
2004-01-30 17:21:42 +02:00
|
|
|
\&\& { return AND; }
|
|
|
|
\|\| { return OR; }
|
|
|
|
\-\> { return IMPL; }
|
2004-02-04 18:49:51 +02:00
|
|
|
\/\/ { return UPDATE; }
|
2005-07-25 18:05:34 +03:00
|
|
|
\+\+ { return CONCAT; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; }
|
2013-08-19 13:35:03 +03:00
|
|
|
{INT} { errno = 0;
|
Remove 100s of CPU time (10%) from build times (1465s -> 1302s)
Result's from Mic92's framework 13th Gen Intel Core i7-1360P:
Before: 3595.92s user 183.01s system 1360% cpu 4:37.74 total
After: 3486.07s user 168.93s system 1354% cpu 4:29.79 total
I saw that boost/lexical_cast was costing about 100s in CPU time on our
compiles. We can fix this trivially by doing explicit template
instantiation in exactly one place and eliminating all other includes of
it, which is a code improvement anyway by hiding the boost.
Before:
```
lix/lix2 » ClangBuildAnalyzer --analyze buildtimeold.bin
Analyzing build trace from 'buildtimeold.bin'...
**** Time summary:
Compilation (551 times):
Parsing (frontend): 1465.3 s
Codegen & opts (backend): 1110.9 s
<snip>
**** Expensive headers:
178153 ms: ../src/libcmd/installable-value.hh (included 52 times, avg 3426 ms), included via:
40x: command.hh
5x: command-installable-value.hh
3x: installable-flake.hh
2x: <direct include>
2x: installable-attr-path.hh
176217 ms: ../src/libutil/error.hh (included 246 times, avg 716 ms), included via:
36x: command.hh installable-value.hh installables.hh derived-path.hh config.hh experimental-features.hh
12x: globals.hh config.hh experimental-features.hh
11x: file-system.hh file-descriptor.hh
6x: serialise.hh strings.hh
6x: <direct include>
6x: archive.hh serialise.hh strings.hh
...
173243 ms: ../src/libstore/store-api.hh (included 152 times, avg 1139 ms), included via:
55x: <direct include>
39x: command.hh installable-value.hh installables.hh
7x: libexpr.hh
4x: local-store.hh
4x: command-installable-value.hh installable-value.hh installables.hh
3x: binary-cache-store.hh
...
170482 ms: ../src/libutil/serialise.hh (included 201 times, avg 848 ms), included via:
37x: command.hh installable-value.hh installables.hh built-path.hh realisation.hh hash.hh
14x: store-api.hh nar-info.hh hash.hh
11x: <direct include>
7x: primops.hh eval.hh attr-set.hh nixexpr.hh value.hh source-path.hh archive.hh
7x: libexpr.hh value.hh source-path.hh archive.hh
6x: fetchers.hh hash.hh
...
169397 ms: ../src/libcmd/installables.hh (included 53 times, avg 3196 ms), included via:
40x: command.hh installable-value.hh
5x: command-installable-value.hh installable-value.hh
3x: installable-flake.hh installable-value.hh
2x: <direct include>
1x: installable-derived-path.hh
1x: installable-value.hh
...
159740 ms: ../src/libutil/strings.hh (included 221 times, avg 722 ms), included via:
37x: command.hh installable-value.hh installables.hh built-path.hh realisation.hh hash.hh serialise.hh
19x: <direct include>
14x: store-api.hh nar-info.hh hash.hh serialise.hh
11x: serialise.hh
7x: primops.hh eval.hh attr-set.hh nixexpr.hh value.hh source-path.hh archive.hh serialise.hh
7x: libexpr.hh value.hh source-path.hh archive.hh serialise.hh
...
156796 ms: ../src/libcmd/command.hh (included 51 times, avg 3074 ms), included via:
42x: <direct include>
7x: command-installable-value.hh
2x: installable-attr-path.hh
150392 ms: ../src/libutil/types.hh (included 251 times, avg 599 ms), included via:
36x: command.hh installable-value.hh installables.hh path.hh
11x: file-system.hh
10x: globals.hh
6x: fetchers.hh
6x: serialise.hh strings.hh error.hh
5x: archive.hh
...
133101 ms: /nix/store/644b90j1vms44nr18yw3520pzkrg4dd1-boost-1.81.0-dev/include/boost/lexical_cast.hpp (included 226 times, avg 588 ms), included via
:
37x: command.hh installable-value.hh installables.hh built-path.hh realisation.hh hash.hh serialise.hh strings.hh
19x: file-system.hh
11x: store-api.hh nar-info.hh hash.hh serialise.hh strings.hh
7x: primops.hh eval.hh attr-set.hh nixexpr.hh value.hh source-path.hh archive.hh serialise.hh strings.hh
7x: libexpr.hh value.hh source-path.hh archive.hh serialise.hh strings.hh
6x: eval.hh attr-set.hh nixexpr.hh value.hh source-path.hh archive.hh serialise.hh strings.hh
...
132887 ms: /nix/store/h2abv2l8irqj942i5rq9wbrj42kbsh5y-gcc-12.3.0/include/c++/12.3.0/memory (included 262 times, avg 507 ms), included via:
36x: command.hh installable-value.hh installables.hh path.hh types.hh ref.hh
16x: gtest.h
11x: file-system.hh types.hh ref.hh
10x: globals.hh types.hh ref.hh
10x: json.hpp
6x: serialise.hh
...
done in 0.6s.
```
After:
```
lix/lix2 » maintainers/buildtime_report.sh build
Processing all files and saving to '/home/jade/lix/lix2/maintainers/../buildtime.bin'...
done in 0.6s. Run 'ClangBuildAnalyzer --analyze /home/jade/lix/lix2/maintainers/../buildtime.bin' to analyze it.
Analyzing build trace from '/home/jade/lix/lix2/maintainers/../buildtime.bin'...
**** Time summary:
Compilation (551 times):
Parsing (frontend): 1302.1 s
Codegen & opts (backend): 956.3 s
<snip>
**** Expensive headers:
178145 ms: ../src/libutil/error.hh (included 246 times, avg 724 ms), included via:
36x: command.hh installable-value.hh installables.hh derived-path.hh config.hh experimental-features.hh
12x: globals.hh config.hh experimental-features.hh
11x: file-system.hh file-descriptor.hh
6x: <direct include>
6x: serialise.hh strings.hh
6x: fetchers.hh hash.hh serialise.hh strings.hh
...
154043 ms: ../src/libcmd/installable-value.hh (included 52 times, avg 2962 ms), included via:
40x: command.hh
5x: command-installable-value.hh
3x: installable-flake.hh
2x: <direct include>
2x: installable-attr-path.hh
153593 ms: ../src/libstore/store-api.hh (included 152 times, avg 1010 ms), included via:
55x: <direct include>
39x: command.hh installable-value.hh installables.hh
7x: libexpr.hh
4x: local-store.hh
4x: command-installable-value.hh installable-value.hh installables.hh
3x: binary-cache-store.hh
...
149948 ms: ../src/libutil/types.hh (included 251 times, avg 597 ms), included via:
36x: command.hh installable-value.hh installables.hh path.hh
11x: file-system.hh
10x: globals.hh
6x: fetchers.hh
6x: serialise.hh strings.hh error.hh
5x: archive.hh
...
144560 ms: ../src/libcmd/installables.hh (included 53 times, avg 2727 ms), included via:
40x: command.hh installable-value.hh
5x: command-installable-value.hh installable-value.hh
3x: installable-flake.hh installable-value.hh
2x: <direct include>
1x: installable-value.hh
1x: installable-derived-path.hh
...
136585 ms: ../src/libcmd/command.hh (included 51 times, avg 2678 ms), included via:
42x: <direct include>
7x: command-installable-value.hh
2x: installable-attr-path.hh
133394 ms: /nix/store/h2abv2l8irqj942i5rq9wbrj42kbsh5y-gcc-12.3.0/include/c++/12.3.0/memory (included 262 times, avg 509 ms), included via:
36x: command.hh installable-value.hh installables.hh path.hh types.hh ref.hh
16x: gtest.h
11x: file-system.hh types.hh ref.hh
10x: globals.hh types.hh ref.hh
10x: json.hpp
6x: serialise.hh
...
89315 ms: ../src/libstore/derived-path.hh (included 178 times, avg 501 ms), included via:
37x: command.hh installable-value.hh installables.hh
25x: store-api.hh realisation.hh
7x: primops.hh eval.hh attr-set.hh nixexpr.hh value.hh context.hh
6x: eval.hh attr-set.hh nixexpr.hh value.hh context.hh
6x: libexpr.hh value.hh context.hh
6x: shared.hh
...
87347 ms: /nix/store/h2abv2l8irqj942i5rq9wbrj42kbsh5y-gcc-12.3.0/include/c++/12.3.0/ostream (included 273 times, avg 319 ms), included via:
35x: command.hh installable-value.hh installables.hh path.hh types.hh ref.hh memory unique_ptr.h
12x: regex sstream istream
10x: file-system.hh types.hh ref.hh memory unique_ptr.h
10x: gtest.h memory unique_ptr.h
10x: globals.hh types.hh ref.hh memory unique_ptr.h
6x: fetchers.hh types.hh ref.hh memory unique_ptr.h
...
85249 ms: ../src/libutil/config.hh (included 213 times, avg 400 ms), included via:
37x: command.hh installable-value.hh installables.hh derived-path.hh
20x: globals.hh
20x: logging.hh
16x: store-api.hh logging.hh
6x: <direct include>
6x: eval.hh attr-set.hh nixexpr.hh value.hh context.hh derived-path.hh
...
done in 0.5s.
```
Adapated from https://git.lix.systems/lix-project/lix/commit/18aa3e1d570b4ecbb9962376e5fba5757dad8da9
2024-05-30 07:12:34 +03:00
|
|
|
std::optional<int64_t> numMay = string2Int<int64_t>(yytext);
|
|
|
|
if (numMay.has_value()) {
|
|
|
|
yylval->n = *numMay;
|
|
|
|
} else {
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
throw ParseError(ErrorInfo{
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("invalid integer '%1%'", yytext),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = state->positions[CUR_POS],
|
2022-03-24 10:10:33 +02:00
|
|
|
});
|
libexpr: Use int64_t for NixInt
Using a 64bit integer on 32bit systems will come with a bit of a
performance overhead, but given that Nix doesn't use a lot of integers
compared to other types, I think the overhead is negligible also
considering that 32bit systems are in decline.
The biggest advantage however is that when we use a consistent integer
size across all platforms it's less likely that we miss things that we
break due to that. One example would be:
https://github.com/NixOS/nixpkgs/pull/44233
On Hydra it will evaluate, because the evaluator runs on a 64bit
machine, but when evaluating the same on a 32bit machine it will fail,
so using 64bit integers should make that consistent.
While the change of the type in value.hh is rather easy to do, we have a
few more options available for doing the conversion in the lexer:
* Via an #ifdef on the architecture and using strtol() or strtoll()
accordingly depending on which architecture we are. For the #ifdef
we would need another AX_COMPILE_CHECK_SIZEOF in configure.ac.
* Using istringstream, which would involve copying the value.
* As we're already using boost, lexical_cast might be a good idea.
Spoiler: I went for the latter, first of all because lexical_cast does
have an overload for const char* and second of all, because it doesn't
involve copying around the input string. Also, because istringstream
seems to come with a bigger overhead than boost::lexical_cast:
https://www.boost.org/doc/libs/release/doc/html/boost_lexical_cast/performance.html
The first method (still using strtol/strtoll) also wasn't something I
pursued further, because it is also locale-aware which I doubt is what
we want, given that the regex for int is [0-9]+.
Signed-off-by: aszlig <aszlig@nix.build>
Fixes: #2339
2018-08-29 01:23:51 +03:00
|
|
|
}
|
2024-01-13 02:46:48 +02:00
|
|
|
return INT_LIT;
|
2004-01-30 19:06:03 +02:00
|
|
|
}
|
2016-01-05 01:40:40 +02:00
|
|
|
{FLOAT} { errno = 0;
|
2016-01-05 10:46:37 +02:00
|
|
|
yylval->nf = strtod(yytext, 0);
|
2016-01-05 01:40:40 +02:00
|
|
|
if (errno != 0)
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
throw ParseError(ErrorInfo{
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("invalid float '%1%'", yytext),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = state->positions[CUR_POS],
|
2022-03-24 10:10:33 +02:00
|
|
|
});
|
2024-01-13 02:46:48 +02:00
|
|
|
return FLOAT_LIT;
|
2016-01-05 01:40:40 +02:00
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2016-01-20 17:34:42 +02:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\} { /* State INITIAL only exists at the bottom of the stack and is
|
|
|
|
used as a marker. DEFAULT replaces it everywhere else.
|
|
|
|
Popping when in INITIAL state causes an empty stack exception,
|
|
|
|
so don't */
|
|
|
|
if (YYSTATE != INITIAL)
|
|
|
|
POP_STATE();
|
|
|
|
return '}';
|
|
|
|
}
|
|
|
|
\{ { PUSH_STATE(DEFAULT); return '{'; }
|
2014-01-06 17:27:26 +02:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\" { PUSH_STATE(STRING); return '"'; }
|
2018-02-25 23:51:47 +02:00
|
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
|
|
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
|
2017-05-01 02:07:33 +03:00
|
|
|
/* It is impossible to match strings ending with '$' with one
|
|
|
|
regex because trailing contexts are only valid at the end
|
|
|
|
of a rule. (A sane but undocumented limitation.) */
|
2024-01-15 17:52:18 +02:00
|
|
|
yylval->str = unescapeStr(state->symbols, yytext, yyleng);
|
2017-05-01 02:07:33 +03:00
|
|
|
return STR;
|
|
|
|
}
|
2016-06-14 18:42:46 +03:00
|
|
|
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2017-05-01 02:07:33 +03:00
|
|
|
<STRING>\" { POP_STATE(); return '"'; }
|
|
|
|
<STRING>\$|\\|\$\\ {
|
|
|
|
/* This can only occur when we reach EOF, otherwise the above
|
|
|
|
(...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered.
|
|
|
|
This is technically invalid, but we leave the problem to the
|
|
|
|
parser who fails with exact location. */
|
2022-05-25 16:45:10 +03:00
|
|
|
return EOF;
|
2017-05-01 02:07:33 +03:00
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
|
2008-02-05 15:38:07 +02:00
|
|
|
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {yytext, (size_t) yyleng, true};
|
2007-11-30 18:48:45 +02:00
|
|
|
return IND_STR;
|
2007-12-06 12:20:58 +02:00
|
|
|
}
|
2017-05-01 02:05:41 +03:00
|
|
|
<IND_STRING>\'\'\$ |
|
|
|
|
<IND_STRING>\$ {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"$", 1};
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
|
|
|
<IND_STRING>\'\'\' {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"''", 2};
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
2018-02-25 23:51:47 +02:00
|
|
|
<IND_STRING>\'\'\\{ANY} {
|
2024-01-15 17:52:18 +02:00
|
|
|
yylval->str = unescapeStr(state->symbols, yytext + 2, yyleng - 2);
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
2007-11-30 18:48:45 +02:00
|
|
|
}
|
2016-06-14 18:42:46 +03:00
|
|
|
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2015-07-02 19:39:02 +03:00
|
|
|
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
|
2008-02-05 15:38:07 +02:00
|
|
|
<IND_STRING>\' {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"'", 1};
|
2008-02-05 15:38:07 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2021-07-29 19:03:07 +03:00
|
|
|
{PATH_SEG}\$\{ |
|
|
|
|
{HPATH_START}\$\{ {
|
|
|
|
PUSH_STATE(PATH_START);
|
|
|
|
yyless(0);
|
2023-12-10 14:00:18 +02:00
|
|
|
yylloc->unstash();
|
2021-07-29 19:03:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
<PATH_START>{PATH_SEG} {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
<PATH_START>{HPATH_START} {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return HPATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
{PATH} {
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH;
|
|
|
|
}
|
|
|
|
{HPATH} {
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return HPATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
<INPATH,INPATH_SLASH>\$\{ {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH);
|
|
|
|
PUSH_STATE(DEFAULT);
|
|
|
|
return DOLLAR_CURLY;
|
|
|
|
}
|
|
|
|
<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ {
|
|
|
|
POP_STATE();
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return STR;
|
|
|
|
}
|
|
|
|
<INPATH>{ANY} |
|
|
|
|
<INPATH><<EOF>> {
|
|
|
|
/* if we encounter a non-path character we inform the parser that the path has
|
|
|
|
ended with a PATH_END token and re-parse this character in the default
|
|
|
|
context (it may be ')', ';', or something of that sort) */
|
|
|
|
POP_STATE();
|
|
|
|
yyless(0);
|
2023-12-10 14:00:18 +02:00
|
|
|
yylloc->unstash();
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH_END;
|
|
|
|
}
|
|
|
|
|
|
|
|
<INPATH_SLASH>{ANY} |
|
|
|
|
<INPATH_SLASH><<EOF>> {
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
throw ParseError(ErrorInfo{
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("path has a trailing slash"),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = state->positions[CUR_POS],
|
2022-03-24 10:10:33 +02:00
|
|
|
});
|
2021-07-29 19:03:07 +03:00
|
|
|
}
|
2016-01-20 17:34:42 +02:00
|
|
|
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
|
|
|
|
{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2006-08-16 13:28:44 +03:00
|
|
|
[ \t\r\n]+ /* eat up whitespace */
|
|
|
|
\#[^\r\n]* /* single-line comments */
|
2016-11-13 18:06:04 +02:00
|
|
|
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2018-05-11 13:02:19 +03:00
|
|
|
{ANY} {
|
|
|
|
/* Don't return a negative number, as this will cause
|
|
|
|
Bison to stop parsing without an error. */
|
|
|
|
return (unsigned char) yytext[0];
|
|
|
|
}
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
%%
|