Track doc comments and render them in :doc

This commit is contained in:
Robert Hensing 2024-07-08 17:39:26 +02:00
parent e5af7cbeb9
commit 7fae378835
25 changed files with 515 additions and 16 deletions

View file

@ -0,0 +1,53 @@
---
synopsis: "`nix-repl`'s `:doc` shows documentation comments"
significance: significant
issues:
- 3904
- 10771
prs:
- 1652
- 9054
- 11072
---
`nix repl` has a `:doc` command that previously only rendered documentation for internally defined functions.
This feature has been extended to also render function documentation comments, in accordance with [RFC 145].
Example:
```
nix-repl> :doc lib.toFunction
Function toFunction
… defined at /home/user/h/nixpkgs/lib/trivial.nix:1072:5
Turns any non-callable values into constant functions. Returns
callable values as is.
Inputs
v
: Any value
Examples
:::{.example}
## lib.trivial.toFunction usage example
| nix-repl> lib.toFunction 1 2
| 1
|
| nix-repl> lib.toFunction (x: x + 1) 2
| 3
:::
```
Known limitations:
- It currently only works for functions. We plan to extend this to attributes, which may contain arbitrary values.
- Some extensions to markdown are not yet supported, as you can see in the example above.
We'd like to acknowledge Yingchi Long for proposing a proof of concept for this functionality in [#9054](https://github.com/NixOS/nix/pull/9054), as well as @sternenseemann and Johannes Kirschbauer for their contributions, proposals, and their work on [RFC 145].
[RFC 145]: https://github.com/NixOS/rfcs/pull/145

View file

@ -559,6 +559,67 @@ std::optional<EvalState::Doc> EvalState::getDoc(Value & v)
.doc = doc,
};
}
if (v.isLambda()) {
auto exprLambda = v.payload.lambda.fun;
std::stringstream s(std::ios_base::out);
std::string name;
auto pos = positions[exprLambda->getPos()];
std::string docStr;
if (exprLambda->name) {
name = symbols[exprLambda->name];
}
if (exprLambda->docComment) {
auto begin = positions[exprLambda->docComment.begin];
auto end = positions[exprLambda->docComment.end];
auto docCommentStr = begin.getSnippetUpTo(end);
// Strip "/**" and "*/"
constexpr size_t prefixLen = 3;
constexpr size_t suffixLen = 2;
docStr = docCommentStr.substr(prefixLen, docCommentStr.size() - prefixLen - suffixLen);
if (docStr.empty())
return {};
// Turn the now missing "/**" into indentation
docStr = " " + docStr;
// Strip indentation (for the whole, potentially multi-line string)
docStr = stripIndentation(docStr);
}
if (name.empty()) {
s << "Function ";
}
else {
s << "Function **" << name << "**";
if (pos)
s << "\\\n" ;
else
s << "\\\n";
}
if (pos) {
s << "defined at " << pos;
}
if (!docStr.empty()) {
s << "\n\n";
}
s << docStr;
s << '\0'; // for making a c string below
std::string ss = s.str();
return Doc {
.pos = pos,
.name = name,
.arity = 0, // FIXME: figure out how deep by syntax only? It's not semantically useful though...
.args = {},
.doc =
// FIXME: this leaks; make the field std::string?
strdup(ss.data()),
};
}
return {};
}

View file

@ -5,7 +5,7 @@
%option stack
%option nodefault
%option nounput noyy_top_state
%option extra-type="::nix::LexerState *"
%s DEFAULT
%x STRING
@ -23,6 +23,12 @@
#include "nixexpr.hh"
#include "parser-tab.hh"
// !!! FIXME !!!
#define YY_EXTRA_TYPE ::nix::LexerState *
int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner);
YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner );
#undef YY_EXTRA_TYPE
using namespace nix;
namespace nix {
@ -35,10 +41,24 @@ static void initLoc(YYLTYPE * loc)
loc->first_column = loc->last_column = 0;
}
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
static void adjustLoc(yyscan_t yyscanner, YYLTYPE * loc, const char * s, size_t len)
{
loc->stash();
LexerState & lexerState = *yyget_extra(yyscanner);
if (lexerState.docCommentDistance == 1) {
// Preceding token was a doc comment.
ParserLocation doc;
doc.first_column = lexerState.lastDocCommentLoc.first_column;
ParserLocation docEnd;
docEnd.first_column = lexerState.lastDocCommentLoc.last_column;
DocComment docComment{lexerState.at(doc), lexerState.at(docEnd)};
PosIdx locPos = lexerState.at(*loc);
lexerState.positionToDocComment.emplace(locPos, docComment);
}
lexerState.docCommentDistance++;
loc->first_column = loc->last_column;
loc->last_column += len;
}
@ -79,7 +99,7 @@ static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#define YY_USER_INIT initLoc(yylloc)
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
#define YY_USER_ACTION adjustLoc(yyscanner, yylloc, yytext, yyleng);
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
#define POP_STATE() yy_pop_state(yyscanner)
@ -279,9 +299,33 @@ or { return OR_KW; }
{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
[ \t\r\n]+ /* eat up whitespace */
\#[^\r\n]* /* single-line comments */
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
%{
// Doc comment rule
//
// \/\*\* /**
// [^/*] reject /**/ (empty comment) and /***
// ([^*]|\*+[^*/])*\*+\/ same as the long comment rule
// ( )* zero or more non-ending sequences
// \* end(1)
// \/ end(2)
%}
\/\*\*[^/*]([^*]|\*+[^*/])*\*+\/ /* doc comments */ {
LexerState & lexerState = *yyget_extra(yyscanner);
lexerState.docCommentDistance = 0;
lexerState.lastDocCommentLoc.first_line = yylloc->first_line;
lexerState.lastDocCommentLoc.first_column = yylloc->first_column;
lexerState.lastDocCommentLoc.last_column = yylloc->last_column;
}
%{
// The following rules have docCommentDistance--
// This compensates for the docCommentDistance++ which happens by default to
// make all the other rules invalidate the doc comment.
%}
[ \t\r\n]+ /* eat up whitespace */ { yyget_extra(yyscanner)->docCommentDistance--; }
\#[^\r\n]* /* single-line comments */ { yyget_extra(yyscanner)->docCommentDistance--; }
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */ { yyget_extra(yyscanner)->docCommentDistance--; }
{ANY} {
/* Don't return a negative number, as this will cause

View file

@ -583,6 +583,20 @@ std::string ExprLambda::showNamePos(const EvalState & state) const
return fmt("%1% at %2%", id, state.positions[pos]);
}
void ExprLambda::setDocComment(DocComment docComment) {
if (!this->docComment) {
this->docComment = docComment;
// Curried functions are defined by putting a function directly
// in the body of another function. To render docs for those, we
// need to propagate the doc comment to the innermost function.
//
// If we have our own comment, we've already propagated it, so this
// belongs in the same conditional.
body->setDocComment(docComment);
}
};
/* Position table. */

View file

@ -11,13 +11,56 @@
namespace nix {
struct Env;
struct Value;
class EvalState;
class PosTable;
struct Env;
struct ExprWith;
struct StaticEnv;
struct Value;
/**
* A documentation comment, in the sense of [RFC 145](https://github.com/NixOS/rfcs/blob/master/rfcs/0145-doc-strings.md)
*
* Note that this does not implement the following:
* - argument attribute names ("formals"): TBD
* - argument names: these are internal to the function and their names may not be optimal for documentation
* - function arity (degree of currying or number of ':'s):
* - Functions returning partially applied functions have a higher arity
* than can be determined locally and without evaluation.
* We do not want to present false data.
* - Some functions should be thought of as transformations of other
* functions. For instance `overlay -> overlay -> overlay` is the simplest
* way to understand `composeExtensions`, but its implementation looks like
* `f: g: final: prev: <...>`. The parameters `final` and `prev` are part
* of the overlay concept, while distracting from the function's purpose.
*/
struct DocComment {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wcomment" // "nested comment start" is intentional
/**
* Start of the comment, including `/**`.
*/
PosIdx begin;
#pragma GCC diagnostic pop
/**
* Position right after the final asterisk and `/` that terminate the comment.
*/
PosIdx end;
/**
* Whether the comment is set.
*
* A `DocComment` is small enough that it makes sense to pass by value, and
* therefore baking optionality into it is also useful, to avoiding the memory
* overhead of `std::optional`.
*/
operator bool() const { return static_cast<bool>(begin); }
};
/**
* An attribute path is a sequence of attribute names.
@ -54,6 +97,7 @@ struct Expr
virtual void eval(EvalState & state, Env & env, Value & v);
virtual Value * maybeThunk(EvalState & state, Env & env);
virtual void setName(Symbol name);
virtual void setDocComment(DocComment docComment) { };
virtual PosIdx getPos() const { return noPos; }
};
@ -278,6 +322,8 @@ struct ExprLambda : Expr
Symbol arg;
Formals * formals;
Expr * body;
DocComment docComment;
ExprLambda(PosIdx pos, Symbol arg, Formals * formals, Expr * body)
: pos(pos), arg(arg), formals(formals), body(body)
{
@ -290,6 +336,7 @@ struct ExprLambda : Expr
std::string showNamePos(const EvalState & state) const;
inline bool hasFormals() const { return formals != nullptr; }
PosIdx getPos() const override { return pos; }
virtual void setDocComment(DocComment docComment) override;
COMMON_METHODS
};

View file

@ -1,6 +1,8 @@
#pragma once
///@file
#include <limits>
#include "eval.hh"
namespace nix {
@ -35,10 +37,44 @@ struct ParserLocation
first_column = stashed_first_column;
last_column = stashed_last_column;
}
/** Latest doc comment position, or 0. */
int doc_comment_first_line, doc_comment_first_column, doc_comment_last_column;
};
struct LexerState
{
/**
* Tracks the distance to the last doc comment, in terms of lexer tokens.
*
* The lexer sets this to 0 when reading a doc comment, and increments it
* for every matched rule; see `lexer-helpers.cc`.
* Whitespace and comment rules decrement the distance, so that they result
* in a net 0 change in distance.
*/
int docCommentDistance = std::numeric_limits<int>::max();
/**
* The location of the last doc comment.
*
* (stashing fields are not used)
*/
ParserLocation lastDocCommentLoc;
/**
* @brief Maps some positions to a DocComment, where the comment is relevant to the location.
*/
std::map<PosIdx, DocComment> positionToDocComment;
PosTable & positions;
PosTable::Origin origin;
PosIdx at(const ParserLocation & loc);
};
struct ParserState
{
const LexerState & lexerState;
SymbolTable & symbols;
PosTable & positions;
Expr * result;
@ -270,6 +306,11 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos,
return new ExprConcatStrings(pos, true, es2);
}
inline PosIdx LexerState::at(const ParserLocation & loc)
{
return positions.add(origin, loc.first_column);
}
inline PosIdx ParserState::at(const ParserLocation & loc)
{
return positions.add(origin, loc.first_column);

View file

@ -74,6 +74,14 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char *
});
}
#define SET_DOC_POS(lambda, pos) setDocPosition(state->lexerState, lambda, state->at(pos))
static void setDocPosition(const LexerState & lexerState, ExprLambda * lambda, PosIdx start) {
auto it = lexerState.positionToDocComment.find(start);
if (it != lexerState.positionToDocComment.end()) {
lambda->setDocComment(it->second);
}
}
%}
@ -119,6 +127,7 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char *
%token IND_STRING_OPEN IND_STRING_CLOSE
%token ELLIPSIS
%right IMPL
%left OR
%left AND
@ -140,18 +149,28 @@ expr: expr_function;
expr_function
: ID ':' expr_function
{ $$ = new ExprLambda(CUR_POS, state->symbols.create($1), 0, $3); }
{ auto me = new ExprLambda(CUR_POS, state->symbols.create($1), 0, $3);
$$ = me;
SET_DOC_POS(me, @1);
}
| '{' formals '}' ':' expr_function
{ $$ = new ExprLambda(CUR_POS, state->validateFormals($2), $5); }
{ auto me = new ExprLambda(CUR_POS, state->validateFormals($2), $5);
$$ = me;
SET_DOC_POS(me, @1);
}
| '{' formals '}' '@' ID ':' expr_function
{
auto arg = state->symbols.create($5);
$$ = new ExprLambda(CUR_POS, arg, state->validateFormals($2, CUR_POS, arg), $7);
auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($2, CUR_POS, arg), $7);
$$ = me;
SET_DOC_POS(me, @1);
}
| ID '@' '{' formals '}' ':' expr_function
{
auto arg = state->symbols.create($1);
$$ = new ExprLambda(CUR_POS, arg, state->validateFormals($4, CUR_POS, arg), $7);
auto me = new ExprLambda(CUR_POS, arg, state->validateFormals($4, CUR_POS, arg), $7);
$$ = me;
SET_DOC_POS(me, @1);
}
| ASSERT expr ';' expr_function
{ $$ = new ExprAssert(CUR_POS, $2, $4); }
@ -312,7 +331,20 @@ ind_string_parts
;
binds
: binds attrpath '=' expr ';' { $$ = $1; state->addAttr($$, std::move(*$2), $4, state->at(@2)); delete $2; }
: binds attrpath '=' expr ';' {
$$ = $1;
auto pos = state->at(@2);
{
auto it = state->lexerState.positionToDocComment.find(pos);
if (it != state->lexerState.positionToDocComment.end()) {
$4->setDocComment(it->second);
}
}
state->addAttr($$, std::move(*$2), $4, pos);
delete $2;
}
| binds INHERIT attrs ';'
{ $$ = $1;
for (auto & [i, iPos] : *$3) {
@ -435,17 +467,22 @@ Expr * parseExprFromBuf(
const Expr::AstSymbols & astSymbols)
{
yyscan_t scanner;
LexerState lexerState {
.positions = positions,
.origin = positions.addOrigin(origin, length),
};
ParserState state {
.lexerState = lexerState,
.symbols = symbols,
.positions = positions,
.basePath = basePath,
.origin = positions.addOrigin(origin, length),
.origin = lexerState.origin,
.rootFS = rootFS,
.s = astSymbols,
.settings = settings,
};
yylex_init(&scanner);
yylex_init_extra(&lexerState, &scanner);
Finally _destroy([&] { yylex_destroy(scanner); });
yy_scan_buffer(text, length, scanner);

View file

@ -1,6 +1,7 @@
#!/usr/bin/env bash
source common.sh
source characterisation/framework.sh
testDir="$PWD"
cd "$TEST_ROOT"
@ -244,3 +245,32 @@ testReplResponseNoRegex '
y = { a = 1 };
}
'
# TODO: move init to characterisation/framework.sh
badDiff=0
badExitCode=0
nixVersion="$(nix eval --impure --raw --expr 'builtins.nixVersion' --extra-experimental-features nix-command)"
runRepl () {
# TODO: pass arguments to nix repl; see lang.sh
nix repl 2>&1 \
| stripColors \
| sed \
-e "s@$testDir@/path/to/tests/functional@g" \
-e "s@$nixVersion@<nix version>@g" \
-e "s@Added [0-9]* variables@Added <number omitted> variables@g" \
| grep -vF $'warning: you don\'t have Internet access; disabling some network-dependent features' \
;
}
for test in $(cd "$testDir/repl"; echo *.in); do
test="$(basename "$test" .in)"
in="$testDir/repl/$test.in"
actual="$testDir/repl/$test.actual"
expected="$testDir/repl/$test.expected"
(cd "$testDir/repl"; set +x; runRepl 2>&1) < "$in" > "$actual"
diffAndAcceptInner "$test" "$actual" "$expected"
done
characterisationTestExit

View file

@ -0,0 +1,8 @@
Nix <nix version>
Type :? for help.
Function defined at
/path/to/tests/functional/repl/doc-comment-function.nix:2:1
A doc comment for a file that only contains a function

View file

@ -0,0 +1 @@
:doc import ./doc-comment-function.nix

View file

@ -0,0 +1,3 @@
/** A doc comment for a file that only contains a function */
{ ... }:
{ }

View file

@ -0,0 +1,57 @@
{
/**
Perform *arithmetic* multiplication. It's kind of like repeated **addition**, very neat.
```nix
multiply 2 3
=> 6
```
*/
multiply = x: y: x * y;
/**👈 precisely this wide 👉*/
measurement = x: x;
floatedIn = /** This also works. */
x: y: x;
compact=/**boom*/x: x;
/** Ignore!!! */
unambiguous =
/** Very close */
x: x;
/** Firmly rigid. */
constant = true;
/** Immovably fixed. */
lib.version = "9000";
/** Unchangeably constant. */
lib.attr.empty = { };
lib.attr.undocumented = { };
nonStrict = /** My syntax is not strict, but I'm strict anyway. */ x: x;
strict = /** I don't have to be strict, but I am anyway. */ { ... }: null;
# Note that pre and post are the same here. I just had to name them somehow.
strictPre = /** Here's one way to do this */ a@{ ... }: a;
strictPost = /** Here's another way to do this */ { ... }@a: a;
# TODO
# /** This returns a documented function. */
# documentedArgs =
# /** x */
# x:
# /** y */
# y:
# /** x + y */
# x + y;
# /** Documented formals */
# documentedFormals =
# /** x */
# x: x;
}

View file

@ -0,0 +1,11 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function compact
… defined at
/path/to/tests/functional/repl/doc-comments.nix:18:20
boom

View file

@ -0,0 +1,2 @@
:l doc-comments.nix
:doc compact

View file

@ -0,0 +1,11 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function floatedIn
… defined at
/path/to/tests/functional/repl/doc-comments.nix:16:5
This also works.

View file

@ -0,0 +1,2 @@
:l doc-comments.nix
:doc floatedIn

View file

@ -0,0 +1,29 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function nonStrict
… defined at
/path/to/tests/functional/repl/doc-comments.nix:36:70
My syntax is not strict, but I'm strict anyway.
Function strict
… defined at
/path/to/tests/functional/repl/doc-comments.nix:37:63
I don't have to be strict, but I am anyway.
Function strictPre
… defined at
/path/to/tests/functional/repl/doc-comments.nix:39:48
Here's one way to do this
Function strictPost
… defined at
/path/to/tests/functional/repl/doc-comments.nix:40:53
Here's another way to do this

View file

@ -0,0 +1,5 @@
:l doc-comments.nix
:doc nonStrict
:doc strict
:doc strictPre
:doc strictPost

View file

@ -0,0 +1,11 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function measurement
… defined at
/path/to/tests/functional/repl/doc-comments.nix:13:17
👈 precisely this wide 👉

View file

@ -0,0 +1,2 @@
:l doc-comments.nix
:doc measurement

View file

@ -0,0 +1,15 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function multiply
… defined at
/path/to/tests/functional/repl/doc-comments.nix:10:14
Perform arithmetic multiplication. It's kind of like
repeated addition, very neat.
| multiply 2 3
| => 6

View file

@ -0,0 +1,2 @@
:l doc-comments.nix
:doc multiply

View file

@ -0,0 +1,11 @@
Nix <nix version>
Type :? for help.
Added <number omitted> variables.
Function unambiguous
… defined at
/path/to/tests/functional/repl/doc-comments.nix:23:5
Very close

View file

@ -0,0 +1,2 @@
:l doc-comments.nix
:doc unambiguous