2024-01-15 17:52:18 +02:00
|
|
|
#pragma once
|
2024-01-27 06:11:31 +02:00
|
|
|
///@file
|
2024-01-15 17:52:18 +02:00
|
|
|
|
2024-07-08 18:39:26 +03:00
|
|
|
#include <limits>
|
|
|
|
|
2024-01-15 17:52:18 +02:00
|
|
|
#include "eval.hh"
|
|
|
|
|
|
|
|
namespace nix {
|
|
|
|
|
2024-01-27 06:11:31 +02:00
|
|
|
/**
|
|
|
|
* @note Storing a C-style `char *` and `size_t` allows us to avoid
|
|
|
|
* having to define the special members that using string_view here
|
|
|
|
* would implicitly delete.
|
|
|
|
*/
|
|
|
|
struct StringToken
|
|
|
|
{
|
|
|
|
const char * p;
|
|
|
|
size_t l;
|
|
|
|
bool hasIndentation;
|
|
|
|
operator std::string_view() const { return {p, l}; }
|
2024-01-15 17:52:18 +02:00
|
|
|
};
|
|
|
|
|
2024-01-27 06:11:31 +02:00
|
|
|
struct ParserLocation
|
|
|
|
{
|
2024-07-11 14:06:39 +03:00
|
|
|
int beginOffset;
|
|
|
|
int endOffset;
|
2024-01-15 17:52:18 +02:00
|
|
|
|
|
|
|
// backup to recover from yyless(0)
|
2024-07-11 14:06:39 +03:00
|
|
|
int stashedBeginOffset, stashedEndOffset;
|
2024-01-15 17:52:18 +02:00
|
|
|
|
|
|
|
void stash() {
|
2024-07-11 14:06:39 +03:00
|
|
|
stashedBeginOffset = beginOffset;
|
|
|
|
stashedEndOffset = endOffset;
|
2024-01-15 17:52:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void unstash() {
|
2024-07-11 14:06:39 +03:00
|
|
|
beginOffset = stashedBeginOffset;
|
|
|
|
endOffset = stashedEndOffset;
|
2024-01-15 17:52:18 +02:00
|
|
|
}
|
2024-07-08 18:39:26 +03:00
|
|
|
|
|
|
|
/** Latest doc comment position, or 0. */
|
2024-07-11 13:58:20 +03:00
|
|
|
int doc_comment_first_column, doc_comment_last_column;
|
2024-07-08 18:39:26 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct LexerState
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* Tracks the distance to the last doc comment, in terms of lexer tokens.
|
|
|
|
*
|
|
|
|
* The lexer sets this to 0 when reading a doc comment, and increments it
|
|
|
|
* for every matched rule; see `lexer-helpers.cc`.
|
|
|
|
* Whitespace and comment rules decrement the distance, so that they result
|
|
|
|
* in a net 0 change in distance.
|
|
|
|
*/
|
|
|
|
int docCommentDistance = std::numeric_limits<int>::max();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The location of the last doc comment.
|
|
|
|
*
|
|
|
|
* (stashing fields are not used)
|
|
|
|
*/
|
|
|
|
ParserLocation lastDocCommentLoc;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Maps some positions to a DocComment, where the comment is relevant to the location.
|
|
|
|
*/
|
2024-07-15 20:33:56 +03:00
|
|
|
std::map<PosIdx, DocComment> & positionToDocComment;
|
2024-07-08 18:39:26 +03:00
|
|
|
|
|
|
|
PosTable & positions;
|
|
|
|
PosTable::Origin origin;
|
|
|
|
|
|
|
|
PosIdx at(const ParserLocation & loc);
|
2024-01-15 17:52:18 +02:00
|
|
|
};
|
|
|
|
|
2024-01-27 06:11:31 +02:00
|
|
|
struct ParserState
|
|
|
|
{
|
2024-07-08 18:39:26 +03:00
|
|
|
const LexerState & lexerState;
|
2024-01-15 17:52:18 +02:00
|
|
|
SymbolTable & symbols;
|
2024-01-15 17:52:18 +02:00
|
|
|
PosTable & positions;
|
2024-01-15 17:52:18 +02:00
|
|
|
Expr * result;
|
|
|
|
SourcePath basePath;
|
|
|
|
PosTable::Origin origin;
|
2024-05-03 13:14:01 +03:00
|
|
|
const ref<SourceAccessor> rootFS;
|
2024-01-15 17:52:18 +02:00
|
|
|
const Expr::AstSymbols & s;
|
2024-06-14 19:41:09 +03:00
|
|
|
const EvalSettings & settings;
|
2024-01-15 17:52:18 +02:00
|
|
|
|
|
|
|
void dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos);
|
|
|
|
void dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos);
|
|
|
|
void addAttr(ExprAttrs * attrs, AttrPath && attrPath, Expr * e, const PosIdx pos);
|
|
|
|
Formals * validateFormals(Formals * formals, PosIdx pos = noPos, Symbol arg = {});
|
|
|
|
Expr * stripIndentation(const PosIdx pos,
|
|
|
|
std::vector<std::pair<PosIdx, std::variant<Expr *, StringToken>>> && es);
|
2024-01-15 17:52:18 +02:00
|
|
|
PosIdx at(const ParserLocation & loc);
|
2024-01-15 17:52:18 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
inline void ParserState::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos)
|
|
|
|
{
|
|
|
|
throw ParseError({
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("attribute '%1%' already defined at %2%",
|
2024-01-15 17:52:18 +02:00
|
|
|
showAttrPath(symbols, attrPath), positions[prevPos]),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = positions[pos]
|
2024-01-15 17:52:18 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void ParserState::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos)
|
|
|
|
{
|
|
|
|
throw ParseError({
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("attribute '%1%' already defined at %2%", symbols[attr], positions[prevPos]),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = positions[pos]
|
2024-01-15 17:52:18 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, Expr * e, const PosIdx pos)
|
|
|
|
{
|
|
|
|
AttrPath::iterator i;
|
|
|
|
// All attrpaths have at least one attr
|
|
|
|
assert(!attrPath.empty());
|
|
|
|
// Checking attrPath validity.
|
|
|
|
// ===========================
|
|
|
|
for (i = attrPath.begin(); i + 1 < attrPath.end(); i++) {
|
|
|
|
if (i->symbol) {
|
|
|
|
ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol);
|
|
|
|
if (j != attrs->attrs.end()) {
|
2024-02-26 16:33:52 +02:00
|
|
|
if (j->second.kind != ExprAttrs::AttrDef::Kind::Inherited) {
|
2024-01-15 17:52:18 +02:00
|
|
|
ExprAttrs * attrs2 = dynamic_cast<ExprAttrs *>(j->second.e);
|
|
|
|
if (!attrs2) dupAttr(attrPath, pos, j->second.pos);
|
|
|
|
attrs = attrs2;
|
|
|
|
} else
|
|
|
|
dupAttr(attrPath, pos, j->second.pos);
|
|
|
|
} else {
|
|
|
|
ExprAttrs * nested = new ExprAttrs;
|
|
|
|
attrs->attrs[i->symbol] = ExprAttrs::AttrDef(nested, pos);
|
|
|
|
attrs = nested;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ExprAttrs *nested = new ExprAttrs;
|
|
|
|
attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, nested, pos));
|
|
|
|
attrs = nested;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Expr insertion.
|
|
|
|
// ==========================
|
|
|
|
if (i->symbol) {
|
|
|
|
ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol);
|
|
|
|
if (j != attrs->attrs.end()) {
|
|
|
|
// This attr path is already defined. However, if both
|
|
|
|
// e and the expr pointed by the attr path are two attribute sets,
|
|
|
|
// we want to merge them.
|
|
|
|
// Otherwise, throw an error.
|
|
|
|
auto ae = dynamic_cast<ExprAttrs *>(e);
|
|
|
|
auto jAttrs = dynamic_cast<ExprAttrs *>(j->second.e);
|
|
|
|
if (jAttrs && ae) {
|
2024-01-27 17:33:34 +02:00
|
|
|
if (ae->inheritFromExprs && !jAttrs->inheritFromExprs)
|
|
|
|
jAttrs->inheritFromExprs = std::make_unique<std::vector<Expr *>>();
|
2024-01-15 17:52:18 +02:00
|
|
|
for (auto & ad : ae->attrs) {
|
|
|
|
auto j2 = jAttrs->attrs.find(ad.first);
|
|
|
|
if (j2 != jAttrs->attrs.end()) // Attr already defined in iAttrs, error.
|
|
|
|
dupAttr(ad.first, j2->second.pos, ad.second.pos);
|
|
|
|
jAttrs->attrs.emplace(ad.first, ad.second);
|
2024-01-27 17:33:34 +02:00
|
|
|
if (ad.second.kind == ExprAttrs::AttrDef::Kind::InheritedFrom) {
|
|
|
|
auto & sel = dynamic_cast<ExprSelect &>(*ad.second.e);
|
|
|
|
auto & from = dynamic_cast<ExprInheritFrom &>(*sel.e);
|
|
|
|
from.displ += jAttrs->inheritFromExprs->size();
|
|
|
|
}
|
2024-01-15 17:52:18 +02:00
|
|
|
}
|
|
|
|
jAttrs->dynamicAttrs.insert(jAttrs->dynamicAttrs.end(), ae->dynamicAttrs.begin(), ae->dynamicAttrs.end());
|
2024-01-27 17:33:34 +02:00
|
|
|
if (ae->inheritFromExprs) {
|
|
|
|
jAttrs->inheritFromExprs->insert(jAttrs->inheritFromExprs->end(),
|
|
|
|
ae->inheritFromExprs->begin(), ae->inheritFromExprs->end());
|
|
|
|
}
|
2024-01-15 17:52:18 +02:00
|
|
|
} else {
|
|
|
|
dupAttr(attrPath, pos, j->second.pos);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This attr path is not defined. Let's create it.
|
|
|
|
attrs->attrs.emplace(i->symbol, ExprAttrs::AttrDef(e, pos));
|
|
|
|
e->setName(i->symbol);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, e, pos));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Formals * ParserState::validateFormals(Formals * formals, PosIdx pos, Symbol arg)
|
|
|
|
{
|
|
|
|
std::sort(formals->formals.begin(), formals->formals.end(),
|
|
|
|
[] (const auto & a, const auto & b) {
|
|
|
|
return std::tie(a.name, a.pos) < std::tie(b.name, b.pos);
|
|
|
|
});
|
|
|
|
|
|
|
|
std::optional<std::pair<Symbol, PosIdx>> duplicate;
|
|
|
|
for (size_t i = 0; i + 1 < formals->formals.size(); i++) {
|
|
|
|
if (formals->formals[i].name != formals->formals[i + 1].name)
|
|
|
|
continue;
|
|
|
|
std::pair thisDup{formals->formals[i].name, formals->formals[i + 1].pos};
|
|
|
|
duplicate = std::min(thisDup, duplicate.value_or(thisDup));
|
|
|
|
}
|
|
|
|
if (duplicate)
|
|
|
|
throw ParseError({
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("duplicate formal function argument '%1%'", symbols[duplicate->first]),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = positions[duplicate->second]
|
2024-01-15 17:52:18 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
if (arg && formals->has(arg))
|
|
|
|
throw ParseError({
|
2024-02-04 06:35:19 +02:00
|
|
|
.msg = HintFmt("duplicate formal function argument '%1%'", symbols[arg]),
|
libexpr: Support structured error classes
While preparing PRs like #9753, I've had to change error messages in
dozens of code paths. It would be nice if instead of
EvalError("expected 'boolean' but found '%1%'", showType(v))
we could write
TypeError(v, "boolean")
or similar. Then, changing the error message could be a mechanical
refactor with the compiler pointing out places the constructor needs to
be changed, rather than the error-prone process of grepping through the
codebase. Structured errors would also help prevent the "same" error
from having multiple slightly different messages, and could be a first
step towards error codes / an error index.
This PR reworks the exception infrastructure in `libexpr` to
support exception types with different constructor signatures than
`BaseError`. Actually refactoring the exceptions to use structured data
will come in a future PR (this one is big enough already, as it has to
touch every exception in `libexpr`).
The core design is in `eval-error.hh`. Generally, errors like this:
state.error("'%s' is not a string", getAttrPathStr())
.debugThrow<TypeError>()
are transformed like this:
state.error<TypeError>("'%s' is not a string", getAttrPathStr())
.debugThrow()
The type annotation has moved from `ErrorBuilder::debugThrow` to
`EvalState::error`.
2024-01-23 03:08:29 +02:00
|
|
|
.pos = positions[pos]
|
2024-01-15 17:52:18 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
return formals;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Expr * ParserState::stripIndentation(const PosIdx pos,
|
|
|
|
std::vector<std::pair<PosIdx, std::variant<Expr *, StringToken>>> && es)
|
|
|
|
{
|
|
|
|
if (es.empty()) return new ExprString("");
|
|
|
|
|
|
|
|
/* Figure out the minimum indentation. Note that by design
|
|
|
|
whitespace-only final lines are not taken into account. (So
|
|
|
|
the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */
|
|
|
|
bool atStartOfLine = true; /* = seen only whitespace in the current line */
|
|
|
|
size_t minIndent = 1000000;
|
|
|
|
size_t curIndent = 0;
|
|
|
|
for (auto & [i_pos, i] : es) {
|
|
|
|
auto * str = std::get_if<StringToken>(&i);
|
|
|
|
if (!str || !str->hasIndentation) {
|
|
|
|
/* Anti-quotations and escaped characters end the current start-of-line whitespace. */
|
|
|
|
if (atStartOfLine) {
|
|
|
|
atStartOfLine = false;
|
|
|
|
if (curIndent < minIndent) minIndent = curIndent;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (size_t j = 0; j < str->l; ++j) {
|
|
|
|
if (atStartOfLine) {
|
|
|
|
if (str->p[j] == ' ')
|
|
|
|
curIndent++;
|
|
|
|
else if (str->p[j] == '\n') {
|
|
|
|
/* Empty line, doesn't influence minimum
|
|
|
|
indentation. */
|
|
|
|
curIndent = 0;
|
|
|
|
} else {
|
|
|
|
atStartOfLine = false;
|
|
|
|
if (curIndent < minIndent) minIndent = curIndent;
|
|
|
|
}
|
|
|
|
} else if (str->p[j] == '\n') {
|
|
|
|
atStartOfLine = true;
|
|
|
|
curIndent = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Strip spaces from each line. */
|
|
|
|
auto * es2 = new std::vector<std::pair<PosIdx, Expr *>>;
|
|
|
|
atStartOfLine = true;
|
|
|
|
size_t curDropped = 0;
|
|
|
|
size_t n = es.size();
|
|
|
|
auto i = es.begin();
|
|
|
|
const auto trimExpr = [&] (Expr * e) {
|
|
|
|
atStartOfLine = false;
|
|
|
|
curDropped = 0;
|
|
|
|
es2->emplace_back(i->first, e);
|
|
|
|
};
|
|
|
|
const auto trimString = [&] (const StringToken & t) {
|
|
|
|
std::string s2;
|
|
|
|
for (size_t j = 0; j < t.l; ++j) {
|
|
|
|
if (atStartOfLine) {
|
|
|
|
if (t.p[j] == ' ') {
|
|
|
|
if (curDropped++ >= minIndent)
|
|
|
|
s2 += t.p[j];
|
|
|
|
}
|
|
|
|
else if (t.p[j] == '\n') {
|
|
|
|
curDropped = 0;
|
|
|
|
s2 += t.p[j];
|
|
|
|
} else {
|
|
|
|
atStartOfLine = false;
|
|
|
|
curDropped = 0;
|
|
|
|
s2 += t.p[j];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
s2 += t.p[j];
|
|
|
|
if (t.p[j] == '\n') atStartOfLine = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the last line if it is empty and consists only of
|
|
|
|
spaces. */
|
|
|
|
if (n == 1) {
|
|
|
|
std::string::size_type p = s2.find_last_of('\n');
|
|
|
|
if (p != std::string::npos && s2.find_first_not_of(' ', p + 1) == std::string::npos)
|
|
|
|
s2 = std::string(s2, 0, p + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
es2->emplace_back(i->first, new ExprString(std::move(s2)));
|
|
|
|
};
|
|
|
|
for (; i != es.end(); ++i, --n) {
|
|
|
|
std::visit(overloaded { trimExpr, trimString }, i->second);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If this is a single string, then don't do a concatenation. */
|
|
|
|
if (es2->size() == 1 && dynamic_cast<ExprString *>((*es2)[0].second)) {
|
|
|
|
auto *const result = (*es2)[0].second;
|
|
|
|
delete es2;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return new ExprConcatStrings(pos, true, es2);
|
|
|
|
}
|
|
|
|
|
2024-07-08 18:39:26 +03:00
|
|
|
inline PosIdx LexerState::at(const ParserLocation & loc)
|
|
|
|
{
|
2024-07-11 14:06:39 +03:00
|
|
|
return positions.add(origin, loc.beginOffset);
|
2024-07-08 18:39:26 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 17:52:18 +02:00
|
|
|
inline PosIdx ParserState::at(const ParserLocation & loc)
|
2024-01-15 17:52:18 +02:00
|
|
|
{
|
2024-07-11 14:06:39 +03:00
|
|
|
return positions.add(origin, loc.beginOffset);
|
2024-01-15 17:52:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|