nix-super/src/libexpr/nixexpr.hh

399 lines
9.9 KiB
C++
Raw Normal View History

#pragma once
2003-10-30 18:11:24 +02:00
#include "value.hh"
#include "symbol-table.hh"
2020-05-12 00:52:15 +03:00
#include "error.hh"
namespace nix {
2003-10-30 18:11:24 +02:00
MakeError(EvalError, Error);
MakeError(ParseError, Error);
MakeError(AssertionError, EvalError);
MakeError(ThrownError, AssertionError);
MakeError(Abort, EvalError);
MakeError(TypeError, EvalError);
MakeError(UndefinedVarError, Error);
MakeError(MissingArgumentError, EvalError);
MakeError(RestrictedPathError, Error);
/* Position objects. */
struct Pos
{
2020-05-21 07:18:26 +03:00
FileOrigin origin;
Symbol file;
unsigned int line, column;
Pos() : origin(foString), line(0), column(0) { }
2020-05-21 07:18:26 +03:00
Pos(FileOrigin origin, const Symbol & file, unsigned int line, unsigned int column)
: origin(origin), file(file), line(line), column(column) { }
2014-04-04 22:14:11 +03:00
operator bool() const
{
return line != 0;
}
bool operator < (const Pos & p2) const
{
if (!line) return p2.line;
if (!p2.line) return false;
int d = ((const std::string &) file).compare((const std::string &) p2.file);
if (d < 0) return true;
if (d > 0) return false;
if (line < p2.line) return true;
if (line > p2.line) return false;
return column < p2.column;
}
};
extern Pos noPos;
2010-04-13 00:21:24 +03:00
std::ostream & operator << (std::ostream & str, const Pos & pos);
struct Env;
struct Value;
2014-01-21 19:29:55 +02:00
class EvalState;
struct StaticEnv;
/* An attribute path is a sequence of attribute names. */
struct AttrName
{
Symbol symbol;
Expr * expr;
AttrName(const Symbol & s) : symbol(s) {};
AttrName(Expr * e) : expr(e) {};
};
typedef std::vector<AttrName> AttrPath;
std::string showAttrPath(const AttrPath & attrPath);
/* Abstract syntax of Nix expressions. */
struct Expr
{
2014-01-21 19:29:55 +02:00
virtual ~Expr() { };
virtual void show(std::ostream & str) const;
virtual void bindVars(const StaticEnv & env);
2010-04-13 01:03:27 +03:00
virtual void eval(EvalState & state, Env & env, Value & v);
virtual Value * maybeThunk(EvalState & state, Env & env);
virtual void setName(Symbol & name);
};
std::ostream & operator << (std::ostream & str, const Expr & e);
#define COMMON_METHODS \
void show(std::ostream & str) const; \
void eval(EvalState & state, Env & env, Value & v); \
void bindVars(const StaticEnv & env);
struct ExprInt : Expr
{
NixInt n;
Value v;
2022-01-04 19:40:39 +02:00
ExprInt(NixInt n) : n(n) { v.mkInt(n); };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
struct ExprFloat : Expr
{
NixFloat nf;
Value v;
2022-01-04 19:40:39 +02:00
ExprFloat(NixFloat nf) : nf(nf) { v.mkFloat(nf); };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
struct ExprString : Expr
{
std::string s;
Value v;
don't use Symbols for strings string expressions by and large do not need the benefits a Symbol gives us, instead they pollute the symbol table and cause unnecessary overhead for almost all strings. the one place we can think of that benefits from them (attrpaths with expressions) extracts the benefit in the parser, which we'll have to touch anyway when changing ExprString to hold strings. this gives a sizeable improvement on of 3-5% on all benchmarks we've run. before nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.844 s ± 0.045 s [User: 6.750 s, System: 1.663 s] Range (min … max): 8.758 s … 8.922 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 367.4 ms ± 3.3 ms [User: 332.3 ms, System: 35.2 ms] Range (min … max): 364.0 ms … 375.2 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.810 s ± 0.030 s [User: 2.517 s, System: 0.225 s] Range (min … max): 2.742 s … 2.854 s 20 runs after nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.533 s ± 0.068 s [User: 6.485 s, System: 1.642 s] Range (min … max): 8.404 s … 8.657 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 347.6 ms ± 3.1 ms [User: 313.1 ms, System: 34.5 ms] Range (min … max): 343.3 ms … 354.6 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.709 s ± 0.032 s [User: 2.414 s, System: 0.232 s] Range (min … max): 2.655 s … 2.788 s 20 runs
2022-01-19 15:31:30 +02:00
ExprString(std::string s) : s(std::move(s)) { v.mkString(this->s.data()); };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
struct ExprPath : Expr
{
std::string s;
Value v;
ExprPath(std::string s) : s(std::move(s)) { v.mkPath(this->s.c_str()); };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
2020-02-24 15:33:01 +02:00
typedef uint32_t Level;
typedef uint32_t Displacement;
2013-10-08 15:24:53 +03:00
struct ExprVar : Expr
{
Pos pos;
Symbol name;
/* Whether the variable comes from an environment (e.g. a rec, let
or function argument) or from a "with". */
bool fromWith;
2013-09-02 17:29:15 +03:00
/* In the former case, the value is obtained by going `level'
levels up from the current environment and getting the
`displ'th value in that environment. In the latter case, the
value is obtained by getting the attribute named `name' from
the set stored in the environment that is `level' levels up
from the current one.*/
2020-02-24 15:33:01 +02:00
Level level;
Displacement displ;
ExprVar(const Symbol & name) : name(name) { };
ExprVar(const Pos & pos, const Symbol & name) : pos(pos), name(name) { };
COMMON_METHODS
Value * maybeThunk(EvalState & state, Env & env);
};
struct ExprSelect : Expr
{
Pos pos;
Expr * e, * def;
AttrPath attrPath;
ExprSelect(const Pos & pos, Expr * e, const AttrPath & attrPath, Expr * def) : pos(pos), e(e), def(def), attrPath(attrPath) { };
ExprSelect(const Pos & pos, Expr * e, const Symbol & name) : pos(pos), e(e), def(0) { attrPath.push_back(AttrName(name)); };
COMMON_METHODS
};
2010-04-13 00:21:24 +03:00
struct ExprOpHasAttr : Expr
{
Expr * e;
AttrPath attrPath;
ExprOpHasAttr(Expr * e, const AttrPath & attrPath) : e(e), attrPath(attrPath) { };
2010-04-13 00:21:24 +03:00
COMMON_METHODS
};
struct ExprAttrs : Expr
{
bool recursive;
Pos pos;
struct AttrDef {
bool inherited;
Expr * e;
Pos pos;
2020-02-24 15:33:01 +02:00
Displacement displ; // displacement
AttrDef(Expr * e, const Pos & pos, bool inherited=false)
: inherited(inherited), e(e), pos(pos) { };
AttrDef() { };
};
typedef std::map<Symbol, AttrDef> AttrDefs;
AttrDefs attrs;
Dynamic attrs This adds new syntax for attribute names: * attrs."${name}" => getAttr name attrs * attrs ? "${name}" => isAttrs attrs && hasAttr attrs name * attrs."${name}" or def => if attrs ? "${name}" then attrs."${name}" else def * { "${name}" = value; } => listToAttrs [{ inherit name value; }] Of course, it's a bit more complicated than that. The attribute chains can be arbitrarily long and contain combinations of static and dynamic parts (e.g. attrs."${foo}".bar."${baz}" or qux), which is relatively straightforward for the getAttrs/hasAttrs cases but is more complex for the listToAttrs case due to rules about duplicate attribute definitions. For attribute sets with dynamic attribute names, duplicate static attributes are detected at parse time while duplicate dynamic attributes are detected when the attribute set is forced. So, for example, { a = null; a.b = null; "${"c"}" = true; } will be a parse-time error, while { a = {}; "${"a"}".b = null; c = true; } will be an eval-time error (technically that case could theoretically be detected at parse time, but the general case would require full evaluation). Moreover, duplicate dynamic attributes are not allowed even in cases where they would be with static attributes ({ a.b.d = true; a.b.c = false; } is legal, but { a."${"b"}".d = true; a."${"b"}".c = false; } is not). This restriction might be relaxed in the future in cases where the static variant would not be an error, but it is not obvious that that is desirable. Finally, recursive attribute sets with dynamic attributes have the static attributes in scope but not the dynamic ones. So rec { a = true; "${"b"}" = a; } is equivalent to { a = true; b = true; } but rec { "${"a"}" = true; b = a; } would be an error or use a from the surrounding scope if it exists. Note that the getAttr, getAttr or default, and hasAttr are all implemented purely in the parser as syntactic sugar, while attribute sets with dynamic attribute names required changes to the AST to be implemented cleanly. This is an alternative solution to and closes #167 Signed-off-by: Shea Levy <shea@shealevy.com>
2013-09-21 06:25:30 +03:00
struct DynamicAttrDef {
Expr * nameExpr, * valueExpr;
Dynamic attrs This adds new syntax for attribute names: * attrs."${name}" => getAttr name attrs * attrs ? "${name}" => isAttrs attrs && hasAttr attrs name * attrs."${name}" or def => if attrs ? "${name}" then attrs."${name}" else def * { "${name}" = value; } => listToAttrs [{ inherit name value; }] Of course, it's a bit more complicated than that. The attribute chains can be arbitrarily long and contain combinations of static and dynamic parts (e.g. attrs."${foo}".bar."${baz}" or qux), which is relatively straightforward for the getAttrs/hasAttrs cases but is more complex for the listToAttrs case due to rules about duplicate attribute definitions. For attribute sets with dynamic attribute names, duplicate static attributes are detected at parse time while duplicate dynamic attributes are detected when the attribute set is forced. So, for example, { a = null; a.b = null; "${"c"}" = true; } will be a parse-time error, while { a = {}; "${"a"}".b = null; c = true; } will be an eval-time error (technically that case could theoretically be detected at parse time, but the general case would require full evaluation). Moreover, duplicate dynamic attributes are not allowed even in cases where they would be with static attributes ({ a.b.d = true; a.b.c = false; } is legal, but { a."${"b"}".d = true; a."${"b"}".c = false; } is not). This restriction might be relaxed in the future in cases where the static variant would not be an error, but it is not obvious that that is desirable. Finally, recursive attribute sets with dynamic attributes have the static attributes in scope but not the dynamic ones. So rec { a = true; "${"b"}" = a; } is equivalent to { a = true; b = true; } but rec { "${"a"}" = true; b = a; } would be an error or use a from the surrounding scope if it exists. Note that the getAttr, getAttr or default, and hasAttr are all implemented purely in the parser as syntactic sugar, while attribute sets with dynamic attribute names required changes to the AST to be implemented cleanly. This is an alternative solution to and closes #167 Signed-off-by: Shea Levy <shea@shealevy.com>
2013-09-21 06:25:30 +03:00
Pos pos;
DynamicAttrDef(Expr * nameExpr, Expr * valueExpr, const Pos & pos)
: nameExpr(nameExpr), valueExpr(valueExpr), pos(pos) { };
Dynamic attrs This adds new syntax for attribute names: * attrs."${name}" => getAttr name attrs * attrs ? "${name}" => isAttrs attrs && hasAttr attrs name * attrs."${name}" or def => if attrs ? "${name}" then attrs."${name}" else def * { "${name}" = value; } => listToAttrs [{ inherit name value; }] Of course, it's a bit more complicated than that. The attribute chains can be arbitrarily long and contain combinations of static and dynamic parts (e.g. attrs."${foo}".bar."${baz}" or qux), which is relatively straightforward for the getAttrs/hasAttrs cases but is more complex for the listToAttrs case due to rules about duplicate attribute definitions. For attribute sets with dynamic attribute names, duplicate static attributes are detected at parse time while duplicate dynamic attributes are detected when the attribute set is forced. So, for example, { a = null; a.b = null; "${"c"}" = true; } will be a parse-time error, while { a = {}; "${"a"}".b = null; c = true; } will be an eval-time error (technically that case could theoretically be detected at parse time, but the general case would require full evaluation). Moreover, duplicate dynamic attributes are not allowed even in cases where they would be with static attributes ({ a.b.d = true; a.b.c = false; } is legal, but { a."${"b"}".d = true; a."${"b"}".c = false; } is not). This restriction might be relaxed in the future in cases where the static variant would not be an error, but it is not obvious that that is desirable. Finally, recursive attribute sets with dynamic attributes have the static attributes in scope but not the dynamic ones. So rec { a = true; "${"b"}" = a; } is equivalent to { a = true; b = true; } but rec { "${"a"}" = true; b = a; } would be an error or use a from the surrounding scope if it exists. Note that the getAttr, getAttr or default, and hasAttr are all implemented purely in the parser as syntactic sugar, while attribute sets with dynamic attribute names required changes to the AST to be implemented cleanly. This is an alternative solution to and closes #167 Signed-off-by: Shea Levy <shea@shealevy.com>
2013-09-21 06:25:30 +03:00
};
typedef std::vector<DynamicAttrDef> DynamicAttrDefs;
DynamicAttrDefs dynamicAttrs;
ExprAttrs(const Pos &pos) : recursive(false), pos(pos) { };
ExprAttrs() : recursive(false), pos(noPos) { };
COMMON_METHODS
};
struct ExprList : Expr
{
std::vector<Expr *> elems;
ExprList() { };
COMMON_METHODS
};
2003-10-30 18:11:24 +02:00
struct Formal
{
Pos pos;
Symbol name;
Expr * def;
Formal(const Pos & pos, const Symbol & name, Expr * def) : pos(pos), name(name), def(def) { };
};
struct Formals
{
defer formals duplicate check for incresed efficiency all round if we defer the duplicate argument check for lambda formals we can use more efficient data structures for the formals set, and we can get rid of the duplication of formals names to boot. instead of a list of formals we've seen and a set of names we'll keep a vector instead and run a sort+dupcheck step before moving the parsed formals into a newly created lambda. this improves performance on search and rebuild by ~1%, pure parsing gains more (about 4%). this does reorder lambda arguments in the xml output, but the output is still stable. this shouldn't be a problem since argument order is not semantically important anyway. before nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.550 s ± 0.060 s [User: 6.470 s, System: 1.664 s] Range (min … max): 8.435 s … 8.666 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 346.7 ms ± 2.1 ms [User: 312.4 ms, System: 34.2 ms] Range (min … max): 343.8 ms … 353.4 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.720 s ± 0.031 s [User: 2.415 s, System: 0.231 s] Range (min … max): 2.662 s … 2.780 s 20 runs after nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.462 s ± 0.063 s [User: 6.398 s, System: 1.661 s] Range (min … max): 8.339 s … 8.542 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.1 ms ± 1.4 ms [User: 296.8 ms, System: 32.3 ms] Range (min … max): 326.1 ms … 330.8 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.687 s ± 0.035 s [User: 2.392 s, System: 0.228 s] Range (min … max): 2.626 s … 2.754 s 20 runs
2022-01-19 17:49:02 +02:00
typedef std::vector<Formal> Formals_;
Formals_ formals;
bool ellipsis;
defer formals duplicate check for incresed efficiency all round if we defer the duplicate argument check for lambda formals we can use more efficient data structures for the formals set, and we can get rid of the duplication of formals names to boot. instead of a list of formals we've seen and a set of names we'll keep a vector instead and run a sort+dupcheck step before moving the parsed formals into a newly created lambda. this improves performance on search and rebuild by ~1%, pure parsing gains more (about 4%). this does reorder lambda arguments in the xml output, but the output is still stable. this shouldn't be a problem since argument order is not semantically important anyway. before nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.550 s ± 0.060 s [User: 6.470 s, System: 1.664 s] Range (min … max): 8.435 s … 8.666 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 346.7 ms ± 2.1 ms [User: 312.4 ms, System: 34.2 ms] Range (min … max): 343.8 ms … 353.4 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.720 s ± 0.031 s [User: 2.415 s, System: 0.231 s] Range (min … max): 2.662 s … 2.780 s 20 runs after nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.462 s ± 0.063 s [User: 6.398 s, System: 1.661 s] Range (min … max): 8.339 s … 8.542 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.1 ms ± 1.4 ms [User: 296.8 ms, System: 32.3 ms] Range (min … max): 326.1 ms … 330.8 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.687 s ± 0.035 s [User: 2.392 s, System: 0.228 s] Range (min … max): 2.626 s … 2.754 s 20 runs
2022-01-19 17:49:02 +02:00
bool has(Symbol arg) const {
auto it = std::lower_bound(formals.begin(), formals.end(), arg,
[] (const Formal & f, const Symbol & sym) { return f.name < sym; });
return it != formals.end() && it->name == arg;
}
std::vector<Formal> lexicographicOrder() const
{
std::vector<Formal> result(formals.begin(), formals.end());
std::sort(result.begin(), result.end(),
[] (const Formal & a, const Formal & b) {
return std::string_view(a.name) < std::string_view(b.name);
});
return result;
}
};
struct ExprLambda : Expr
{
Pos pos;
Symbol name;
Symbol arg;
Formals * formals;
Expr * body;
ExprLambda(const Pos & pos, const Symbol & arg, Formals * formals, Expr * body)
: pos(pos), arg(arg), formals(formals), body(body)
{
2020-04-29 19:14:32 +03:00
};
void setName(Symbol & name);
std::string showNamePos() const;
inline bool hasFormals() const { return formals != nullptr; }
COMMON_METHODS
};
struct ExprCall : Expr
{
Expr * fun;
std::vector<Expr *> args;
Pos pos;
ExprCall(const Pos & pos, Expr * fun, std::vector<Expr *> && args)
: fun(fun), args(args), pos(pos)
{ }
COMMON_METHODS
};
struct ExprLet : Expr
{
ExprAttrs * attrs;
Expr * body;
ExprLet(ExprAttrs * attrs, Expr * body) : attrs(attrs), body(body) { };
COMMON_METHODS
};
struct ExprWith : Expr
{
Pos pos;
Expr * attrs, * body;
2018-05-02 14:56:34 +03:00
size_t prevWith;
ExprWith(const Pos & pos, Expr * attrs, Expr * body) : pos(pos), attrs(attrs), body(body) { };
COMMON_METHODS
};
struct ExprIf : Expr
{
2020-04-09 10:45:15 +03:00
Pos pos;
Expr * cond, * then, * else_;
2020-04-09 10:45:15 +03:00
ExprIf(const Pos & pos, Expr * cond, Expr * then, Expr * else_) : pos(pos), cond(cond), then(then), else_(else_) { };
COMMON_METHODS
};
2010-04-13 00:21:24 +03:00
struct ExprAssert : Expr
{
Pos pos;
Expr * cond, * body;
ExprAssert(const Pos & pos, Expr * cond, Expr * body) : pos(pos), cond(cond), body(body) { };
COMMON_METHODS
};
struct ExprOpNot : Expr
{
Expr * e;
ExprOpNot(Expr * e) : e(e) { };
COMMON_METHODS
};
#define MakeBinOp(name, s) \
struct name : Expr \
{ \
Pos pos; \
Expr * e1, * e2; \
name(Expr * e1, Expr * e2) : e1(e1), e2(e2) { }; \
name(const Pos & pos, Expr * e1, Expr * e2) : pos(pos), e1(e1), e2(e2) { }; \
void show(std::ostream & str) const \
{ \
2014-10-20 09:44:32 +03:00
str << "(" << *e1 << " " s " " << *e2 << ")"; \
} \
void bindVars(const StaticEnv & env) \
{ \
e1->bindVars(env); e2->bindVars(env); \
} \
void eval(EvalState & state, Env & env, Value & v); \
};
MakeBinOp(ExprOpEq, "==")
MakeBinOp(ExprOpNEq, "!=")
MakeBinOp(ExprOpAnd, "&&")
MakeBinOp(ExprOpOr, "||")
MakeBinOp(ExprOpImpl, "->")
MakeBinOp(ExprOpUpdate, "//")
MakeBinOp(ExprOpConcatLists, "++")
2010-04-13 00:21:24 +03:00
struct ExprConcatStrings : Expr
{
Pos pos;
bool forceString;
2022-02-21 17:32:34 +02:00
std::vector<std::pair<Pos, Expr *> > * es;
ExprConcatStrings(const Pos & pos, bool forceString, std::vector<std::pair<Pos, Expr *> > * es)
: pos(pos), forceString(forceString), es(es) { };
2010-04-13 00:21:24 +03:00
COMMON_METHODS
};
struct ExprPos : Expr
{
Pos pos;
ExprPos(const Pos & pos) : pos(pos) { };
COMMON_METHODS
};
2010-04-08 14:41:19 +03:00
/* Static environments are used to map variable names onto (level,
displacement) pairs used to obtain the value of the variable at
runtime. */
struct StaticEnv
{
bool isWith;
const StaticEnv * up;
// Note: these must be in sorted order.
2020-02-24 15:33:01 +02:00
typedef std::vector<std::pair<Symbol, Displacement>> Vars;
Vars vars;
StaticEnv(bool isWith, const StaticEnv * up, size_t expectedSize = 0) : isWith(isWith), up(up) {
vars.reserve(expectedSize);
};
void sort()
{
std::stable_sort(vars.begin(), vars.end(),
[](const Vars::value_type & a, const Vars::value_type & b) { return a.first < b.first; });
}
void deduplicate()
{
auto it = vars.begin(), jt = it, end = vars.end();
while (jt != end) {
*it = *jt++;
while (jt != end && it->first == jt->first) *it = *jt++;
it++;
}
vars.erase(it, end);
}
Vars::const_iterator find(const Symbol & name) const
{
Vars::value_type key(name, 0);
auto i = std::lower_bound(vars.begin(), vars.end(), key);
if (i != vars.end() && i->first == name) return i;
return vars.end();
}
};
}