2004-01-30 17:21:42 +02:00
|
|
|
%option reentrant bison-bridge bison-locations
|
|
|
|
%option noyywrap
|
|
|
|
%option never-interactive
|
2015-07-02 19:39:02 +03:00
|
|
|
%option stack
|
|
|
|
%option nodefault
|
|
|
|
%option nounput noyy_top_state
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
%s DEFAULT
|
2006-05-01 17:01:47 +03:00
|
|
|
%x STRING
|
2007-11-30 18:48:45 +02:00
|
|
|
%x IND_STRING
|
2021-07-29 19:03:07 +03:00
|
|
|
%x INPATH
|
|
|
|
%x INPATH_SLASH
|
|
|
|
%x PATH_START
|
2006-05-01 17:01:47 +03:00
|
|
|
|
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
%{
|
2020-12-02 15:33:20 +02:00
|
|
|
#ifdef __clang__
|
2020-12-01 15:57:56 +02:00
|
|
|
#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
|
2020-12-02 15:33:20 +02:00
|
|
|
#endif
|
2020-12-01 15:57:56 +02:00
|
|
|
|
libexpr: Use int64_t for NixInt
Using a 64bit integer on 32bit systems will come with a bit of a
performance overhead, but given that Nix doesn't use a lot of integers
compared to other types, I think the overhead is negligible also
considering that 32bit systems are in decline.
The biggest advantage however is that when we use a consistent integer
size across all platforms it's less likely that we miss things that we
break due to that. One example would be:
https://github.com/NixOS/nixpkgs/pull/44233
On Hydra it will evaluate, because the evaluator runs on a 64bit
machine, but when evaluating the same on a 32bit machine it will fail,
so using 64bit integers should make that consistent.
While the change of the type in value.hh is rather easy to do, we have a
few more options available for doing the conversion in the lexer:
* Via an #ifdef on the architecture and using strtol() or strtoll()
accordingly depending on which architecture we are. For the #ifdef
we would need another AX_COMPILE_CHECK_SIZEOF in configure.ac.
* Using istringstream, which would involve copying the value.
* As we're already using boost, lexical_cast might be a good idea.
Spoiler: I went for the latter, first of all because lexical_cast does
have an overload for const char* and second of all, because it doesn't
involve copying around the input string. Also, because istringstream
seems to come with a bigger overhead than boost::lexical_cast:
https://www.boost.org/doc/libs/release/doc/html/boost_lexical_cast/performance.html
The first method (still using strtol/strtoll) also wasn't something I
pursued further, because it is also locale-aware which I doubt is what
we want, given that the regex for int is [0-9]+.
Signed-off-by: aszlig <aszlig@nix.build>
Fixes: #2339
2018-08-29 01:23:51 +03:00
|
|
|
#include <boost/lexical_cast.hpp>
|
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
#include "nixexpr.hh"
|
2006-09-05 00:06:23 +03:00
|
|
|
#include "parser-tab.hh"
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
using namespace nix;
|
|
|
|
|
|
|
|
namespace nix {
|
|
|
|
|
2021-09-30 02:37:51 +03:00
|
|
|
// backup to recover from yyless(0)
|
|
|
|
YYLTYPE prev_yylloc;
|
2013-09-02 17:29:15 +03:00
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
static void initLoc(YYLTYPE * loc)
|
|
|
|
{
|
2010-05-06 19:46:48 +03:00
|
|
|
loc->first_line = loc->last_line = 1;
|
|
|
|
loc->first_column = loc->last_column = 1;
|
2004-01-30 17:21:42 +02:00
|
|
|
}
|
|
|
|
|
2013-09-02 17:29:15 +03:00
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
|
|
|
{
|
2021-09-30 02:37:51 +03:00
|
|
|
prev_yylloc = *loc;
|
|
|
|
|
2010-05-06 19:46:48 +03:00
|
|
|
loc->first_line = loc->last_line;
|
|
|
|
loc->first_column = loc->last_column;
|
|
|
|
|
2021-07-14 00:23:24 +03:00
|
|
|
for (size_t i = 0; i < len; i++) {
|
2004-01-30 17:21:42 +02:00
|
|
|
switch (*s++) {
|
2006-08-16 13:28:44 +03:00
|
|
|
case '\r':
|
2021-07-14 00:23:24 +03:00
|
|
|
if (*s == '\n') { /* cr/lf */
|
|
|
|
i++;
|
2006-08-16 13:28:44 +03:00
|
|
|
s++;
|
2021-07-14 00:23:24 +03:00
|
|
|
}
|
2006-08-16 13:28:44 +03:00
|
|
|
/* fall through */
|
2013-09-02 17:29:15 +03:00
|
|
|
case '\n':
|
2010-05-06 19:46:48 +03:00
|
|
|
++loc->last_line;
|
|
|
|
loc->last_column = 1;
|
2004-01-30 17:21:42 +02:00
|
|
|
break;
|
|
|
|
default:
|
2010-05-06 19:46:48 +03:00
|
|
|
++loc->last_column;
|
2004-01-30 17:21:42 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
|
2021-12-21 11:28:05 +02:00
|
|
|
// we make use of the fact that the parser receives a private copy of the input
|
|
|
|
// string and can munge around in it.
|
2022-01-19 14:39:42 +02:00
|
|
|
static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
2006-09-05 00:06:23 +03:00
|
|
|
{
|
2021-12-21 11:28:05 +02:00
|
|
|
char * result = s;
|
|
|
|
char * t = s;
|
2006-09-05 00:36:15 +03:00
|
|
|
char c;
|
2021-12-21 11:28:05 +02:00
|
|
|
// the input string is terminated with *two* NULs, so we can safely take
|
|
|
|
// *one* character after the one being checked against.
|
2006-09-05 00:36:15 +03:00
|
|
|
while ((c = *s++)) {
|
|
|
|
if (c == '\\') {
|
|
|
|
c = *s++;
|
2021-12-21 11:28:05 +02:00
|
|
|
if (c == 'n') *t = '\n';
|
|
|
|
else if (c == 'r') *t = '\r';
|
|
|
|
else if (c == 't') *t = '\t';
|
|
|
|
else *t = c;
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
|
|
|
else if (c == '\r') {
|
|
|
|
/* Normalise CR and CR/LF into LF. */
|
2021-12-21 11:28:05 +02:00
|
|
|
*t = '\n';
|
2006-09-05 00:36:15 +03:00
|
|
|
if (*s == '\n') s++; /* cr/lf */
|
|
|
|
}
|
2021-12-21 11:28:05 +02:00
|
|
|
else *t = c;
|
|
|
|
t++;
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
2022-01-19 14:39:42 +02:00
|
|
|
return {result, size_t(t - result)};
|
2006-09-05 00:06:23 +03:00
|
|
|
}
|
|
|
|
|
2013-09-02 17:29:15 +03:00
|
|
|
|
2006-09-05 00:36:15 +03:00
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
#define YY_USER_INIT initLoc(yylloc)
|
|
|
|
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
|
|
|
|
|
2015-07-02 19:39:02 +03:00
|
|
|
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
|
|
|
|
#define POP_STATE() yy_pop_state(yyscanner)
|
|
|
|
|
2004-01-30 17:21:42 +02:00
|
|
|
%}
|
|
|
|
|
|
|
|
|
2018-02-25 23:51:47 +02:00
|
|
|
ANY .|\n
|
2012-09-27 22:43:08 +03:00
|
|
|
ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
|
2004-01-30 17:21:42 +02:00
|
|
|
INT [0-9]+
|
2016-01-05 10:54:49 +02:00
|
|
|
FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
|
2021-07-29 19:03:07 +03:00
|
|
|
PATH_CHAR [a-zA-Z0-9\.\_\-\+]
|
|
|
|
PATH {PATH_CHAR}*(\/{PATH_CHAR}+)+\/?
|
|
|
|
PATH_SEG {PATH_CHAR}*\/
|
|
|
|
HPATH \~(\/{PATH_CHAR}+)+\/?
|
|
|
|
HPATH_START \~\/
|
|
|
|
SPATH \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\>
|
2017-11-14 16:10:52 +02:00
|
|
|
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
|
|
|
|
if { return IF; }
|
|
|
|
then { return THEN; }
|
|
|
|
else { return ELSE; }
|
|
|
|
assert { return ASSERT; }
|
2004-10-25 19:54:56 +03:00
|
|
|
with { return WITH; }
|
2004-01-30 17:21:42 +02:00
|
|
|
let { return LET; }
|
2006-10-02 18:52:44 +03:00
|
|
|
in { return IN; }
|
2004-01-30 17:21:42 +02:00
|
|
|
rec { return REC; }
|
2004-02-02 23:39:33 +02:00
|
|
|
inherit { return INHERIT; }
|
2011-07-13 15:19:57 +03:00
|
|
|
or { return OR_KW; }
|
2008-08-14 17:00:44 +03:00
|
|
|
\.\.\. { return ELLIPSIS; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
\=\= { return EQ; }
|
|
|
|
\!\= { return NEQ; }
|
2013-08-02 19:39:40 +03:00
|
|
|
\<\= { return LEQ; }
|
|
|
|
\>\= { return GEQ; }
|
2004-01-30 17:21:42 +02:00
|
|
|
\&\& { return AND; }
|
|
|
|
\|\| { return OR; }
|
|
|
|
\-\> { return IMPL; }
|
2004-02-04 18:49:51 +02:00
|
|
|
\/\/ { return UPDATE; }
|
2005-07-25 18:05:34 +03:00
|
|
|
\+\+ { return CONCAT; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; }
|
2013-08-19 13:35:03 +03:00
|
|
|
{INT} { errno = 0;
|
libexpr: Use int64_t for NixInt
Using a 64bit integer on 32bit systems will come with a bit of a
performance overhead, but given that Nix doesn't use a lot of integers
compared to other types, I think the overhead is negligible also
considering that 32bit systems are in decline.
The biggest advantage however is that when we use a consistent integer
size across all platforms it's less likely that we miss things that we
break due to that. One example would be:
https://github.com/NixOS/nixpkgs/pull/44233
On Hydra it will evaluate, because the evaluator runs on a 64bit
machine, but when evaluating the same on a 32bit machine it will fail,
so using 64bit integers should make that consistent.
While the change of the type in value.hh is rather easy to do, we have a
few more options available for doing the conversion in the lexer:
* Via an #ifdef on the architecture and using strtol() or strtoll()
accordingly depending on which architecture we are. For the #ifdef
we would need another AX_COMPILE_CHECK_SIZEOF in configure.ac.
* Using istringstream, which would involve copying the value.
* As we're already using boost, lexical_cast might be a good idea.
Spoiler: I went for the latter, first of all because lexical_cast does
have an overload for const char* and second of all, because it doesn't
involve copying around the input string. Also, because istringstream
seems to come with a bigger overhead than boost::lexical_cast:
https://www.boost.org/doc/libs/release/doc/html/boost_lexical_cast/performance.html
The first method (still using strtol/strtoll) also wasn't something I
pursued further, because it is also locale-aware which I doubt is what
we want, given that the regex for int is [0-9]+.
Signed-off-by: aszlig <aszlig@nix.build>
Fixes: #2339
2018-08-29 01:23:51 +03:00
|
|
|
try {
|
|
|
|
yylval->n = boost::lexical_cast<int64_t>(yytext);
|
|
|
|
} catch (const boost::bad_lexical_cast &) {
|
2020-04-23 00:00:11 +03:00
|
|
|
throw ParseError("invalid integer '%1%'", yytext);
|
libexpr: Use int64_t for NixInt
Using a 64bit integer on 32bit systems will come with a bit of a
performance overhead, but given that Nix doesn't use a lot of integers
compared to other types, I think the overhead is negligible also
considering that 32bit systems are in decline.
The biggest advantage however is that when we use a consistent integer
size across all platforms it's less likely that we miss things that we
break due to that. One example would be:
https://github.com/NixOS/nixpkgs/pull/44233
On Hydra it will evaluate, because the evaluator runs on a 64bit
machine, but when evaluating the same on a 32bit machine it will fail,
so using 64bit integers should make that consistent.
While the change of the type in value.hh is rather easy to do, we have a
few more options available for doing the conversion in the lexer:
* Via an #ifdef on the architecture and using strtol() or strtoll()
accordingly depending on which architecture we are. For the #ifdef
we would need another AX_COMPILE_CHECK_SIZEOF in configure.ac.
* Using istringstream, which would involve copying the value.
* As we're already using boost, lexical_cast might be a good idea.
Spoiler: I went for the latter, first of all because lexical_cast does
have an overload for const char* and second of all, because it doesn't
involve copying around the input string. Also, because istringstream
seems to come with a bigger overhead than boost::lexical_cast:
https://www.boost.org/doc/libs/release/doc/html/boost_lexical_cast/performance.html
The first method (still using strtol/strtoll) also wasn't something I
pursued further, because it is also locale-aware which I doubt is what
we want, given that the regex for int is [0-9]+.
Signed-off-by: aszlig <aszlig@nix.build>
Fixes: #2339
2018-08-29 01:23:51 +03:00
|
|
|
}
|
2004-01-30 19:06:03 +02:00
|
|
|
return INT;
|
|
|
|
}
|
2016-01-05 01:40:40 +02:00
|
|
|
{FLOAT} { errno = 0;
|
2016-01-05 10:46:37 +02:00
|
|
|
yylval->nf = strtod(yytext, 0);
|
2016-01-05 01:40:40 +02:00
|
|
|
if (errno != 0)
|
2020-04-23 00:00:11 +03:00
|
|
|
throw ParseError("invalid float '%1%'", yytext);
|
2016-01-05 01:40:40 +02:00
|
|
|
return FLOAT;
|
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2016-01-20 17:34:42 +02:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\} { /* State INITIAL only exists at the bottom of the stack and is
|
|
|
|
used as a marker. DEFAULT replaces it everywhere else.
|
|
|
|
Popping when in INITIAL state causes an empty stack exception,
|
|
|
|
so don't */
|
|
|
|
if (YYSTATE != INITIAL)
|
|
|
|
POP_STATE();
|
|
|
|
return '}';
|
|
|
|
}
|
|
|
|
\{ { PUSH_STATE(DEFAULT); return '{'; }
|
2014-01-06 17:27:26 +02:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\" { PUSH_STATE(STRING); return '"'; }
|
2018-02-25 23:51:47 +02:00
|
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
|
|
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
|
2017-05-01 02:07:33 +03:00
|
|
|
/* It is impossible to match strings ending with '$' with one
|
|
|
|
regex because trailing contexts are only valid at the end
|
|
|
|
of a rule. (A sane but undocumented limitation.) */
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = unescapeStr(data->symbols, yytext, yyleng);
|
2017-05-01 02:07:33 +03:00
|
|
|
return STR;
|
|
|
|
}
|
2016-06-14 18:42:46 +03:00
|
|
|
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2017-05-01 02:07:33 +03:00
|
|
|
<STRING>\" { POP_STATE(); return '"'; }
|
|
|
|
<STRING>\$|\\|\$\\ {
|
|
|
|
/* This can only occur when we reach EOF, otherwise the above
|
|
|
|
(...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered.
|
|
|
|
This is technically invalid, but we leave the problem to the
|
|
|
|
parser who fails with exact location. */
|
|
|
|
return STR;
|
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2016-06-14 18:42:46 +03:00
|
|
|
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
|
2008-02-05 15:38:07 +02:00
|
|
|
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {yytext, (size_t) yyleng, true};
|
2007-11-30 18:48:45 +02:00
|
|
|
return IND_STR;
|
2007-12-06 12:20:58 +02:00
|
|
|
}
|
2017-05-01 02:05:41 +03:00
|
|
|
<IND_STRING>\'\'\$ |
|
|
|
|
<IND_STRING>\$ {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"$", 1};
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
|
|
|
<IND_STRING>\'\'\' {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"''", 2};
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
2018-02-25 23:51:47 +02:00
|
|
|
<IND_STRING>\'\'\\{ANY} {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
2007-12-06 12:20:58 +02:00
|
|
|
return IND_STR;
|
2007-11-30 18:48:45 +02:00
|
|
|
}
|
2016-06-14 18:42:46 +03:00
|
|
|
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
2015-07-02 19:39:02 +03:00
|
|
|
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
|
2008-02-05 15:38:07 +02:00
|
|
|
<IND_STRING>\' {
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {"'", 1};
|
2008-02-05 15:38:07 +02:00
|
|
|
return IND_STR;
|
|
|
|
}
|
2006-05-01 17:01:47 +03:00
|
|
|
|
2021-07-29 19:03:07 +03:00
|
|
|
{PATH_SEG}\$\{ |
|
|
|
|
{HPATH_START}\$\{ {
|
|
|
|
PUSH_STATE(PATH_START);
|
|
|
|
yyless(0);
|
2021-09-30 02:37:51 +03:00
|
|
|
*yylloc = prev_yylloc;
|
2021-07-29 19:03:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
<PATH_START>{PATH_SEG} {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
<PATH_START>{HPATH_START} {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return HPATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
{PATH} {
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH;
|
|
|
|
}
|
|
|
|
{HPATH} {
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
yylval->path = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return HPATH;
|
|
|
|
}
|
|
|
|
|
|
|
|
<INPATH,INPATH_SLASH>\$\{ {
|
|
|
|
POP_STATE();
|
|
|
|
PUSH_STATE(INPATH);
|
|
|
|
PUSH_STATE(DEFAULT);
|
|
|
|
return DOLLAR_CURLY;
|
|
|
|
}
|
|
|
|
<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ {
|
|
|
|
POP_STATE();
|
|
|
|
if (yytext[yyleng-1] == '/')
|
|
|
|
PUSH_STATE(INPATH_SLASH);
|
|
|
|
else
|
|
|
|
PUSH_STATE(INPATH);
|
2022-01-19 14:39:42 +02:00
|
|
|
yylval->str = {yytext, (size_t) yyleng};
|
2021-07-29 19:03:07 +03:00
|
|
|
return STR;
|
|
|
|
}
|
|
|
|
<INPATH>{ANY} |
|
|
|
|
<INPATH><<EOF>> {
|
|
|
|
/* if we encounter a non-path character we inform the parser that the path has
|
|
|
|
ended with a PATH_END token and re-parse this character in the default
|
|
|
|
context (it may be ')', ';', or something of that sort) */
|
|
|
|
POP_STATE();
|
|
|
|
yyless(0);
|
2021-09-30 02:37:51 +03:00
|
|
|
*yylloc = prev_yylloc;
|
2021-07-29 19:03:07 +03:00
|
|
|
return PATH_END;
|
|
|
|
}
|
|
|
|
|
|
|
|
<INPATH_SLASH>{ANY} |
|
|
|
|
<INPATH_SLASH><<EOF>> {
|
|
|
|
throw ParseError("path has a trailing slash");
|
|
|
|
}
|
2016-01-20 17:34:42 +02:00
|
|
|
|
don't strdup tokens in the lexer
every stringy token the lexer returns is turned into a Symbol and not
used further, so we don't have to strdup. using a string_view is
sufficient, but due to limitations of the current parser we have to use
a POD type that holds the same information.
gives ~2% on system build, 6% on search, 8% on parsing alone
# before
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms]
Range (min … max): 606.6 ms … 617.3 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms]
Range (min … max): 428.2 ms … 434.2 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s]
Range (min … max): 3.023 s … 3.041 s 50 runs
# after
Benchmark 1: nix search --offline nixpkgs hello
Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms]
Range (min … max): 569.2 ms … 580.7 ms 50 runs
Benchmark 2: nix eval -f hackage-packages.nix
Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms]
Range (min … max): 392.7 ms … 395.7 ms 50 runs
Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s]
Range (min … max): 2.966 s … 2.990 s 50 runs
2021-12-21 10:17:31 +02:00
|
|
|
{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
|
|
|
|
{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2006-08-16 13:28:44 +03:00
|
|
|
[ \t\r\n]+ /* eat up whitespace */
|
|
|
|
\#[^\r\n]* /* single-line comments */
|
2016-11-13 18:06:04 +02:00
|
|
|
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
|
2004-01-30 17:21:42 +02:00
|
|
|
|
2018-05-11 13:02:19 +03:00
|
|
|
{ANY} {
|
|
|
|
/* Don't return a negative number, as this will cause
|
|
|
|
Bison to stop parsing without an error. */
|
|
|
|
return (unsigned char) yytext[0];
|
|
|
|
}
|
2004-01-30 17:21:42 +02:00
|
|
|
|
|
|
|
%%
|