From 7434caca0545bd6194bb52eebf6fdf0424755eb0 Mon Sep 17 00:00:00 2001
From: Rebecca Turner <rbt@sent.as>
Date: Fri, 15 Dec 2023 11:52:21 -0800
Subject: [PATCH] Fix segfault on infinite recursion in some cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a segfault on infinite function call recursion (rather than
infinite thunk recursion) by tracking the function call depth in
`EvalState`.

Additionally, to avoid printing extremely long stack traces, stack
frames are now deduplicated, with a `(19997 duplicate traces omitted)`
message. This should only really be triggered in infinite recursion
scenarios.

Before:

    $ nix-instantiate --eval --expr '(x: x x) (x: x x)'
    Segmentation fault: 11

After:

    $ nix-instantiate --eval --expr '(x: x x) (x: x x)'
    error: stack overflow

           at «string»:1:14:
                1| (x: x x) (x: x x)
                 |              ^

    $ nix-instantiate --eval --expr '(x: x x) (x: x x)' --show-trace
    error:
           … from call site
             at «string»:1:1:
                1| (x: x x) (x: x x)
                 | ^

           … while calling anonymous lambda
             at «string»:1:2:
                1| (x: x x) (x: x x)
                 |  ^

           … from call site
             at «string»:1:5:
                1| (x: x x) (x: x x)
                 |     ^

           … while calling anonymous lambda
             at «string»:1:11:
                1| (x: x x) (x: x x)
                 |           ^

           … from call site
             at «string»:1:14:
                1| (x: x x) (x: x x)
                 |              ^

           (19997 duplicate traces omitted)

           error: stack overflow
           at «string»:1:14:
                1| (x: x x) (x: x x)
                 |              ^
---
 .../rl-next/stack-overflow-segfaults.md       |  32 +++++
 src/libexpr/eval-settings.hh                  |   3 +
 src/libexpr/eval.cc                           |  18 +++
 src/libexpr/eval.hh                           |   5 +
 src/libutil/error.cc                          | 111 +++++++++++++++++-
 src/libutil/error.hh                          |   8 ++
 .../lang/eval-fail-duplicate-traces.err.exp   |  44 +++++++
 .../lang/eval-fail-duplicate-traces.nix       |   9 ++
 ...val-fail-infinite-recursion-lambda.err.exp |  38 ++++++
 .../eval-fail-infinite-recursion-lambda.nix   |   1 +
 .../lang/eval-fail-mutual-recursion.err.exp   |  57 +++++++++
 .../lang/eval-fail-mutual-recursion.nix       |  36 ++++++
 12 files changed, 358 insertions(+), 4 deletions(-)
 create mode 100644 doc/manual/rl-next/stack-overflow-segfaults.md
 create mode 100644 tests/functional/lang/eval-fail-duplicate-traces.err.exp
 create mode 100644 tests/functional/lang/eval-fail-duplicate-traces.nix
 create mode 100644 tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp
 create mode 100644 tests/functional/lang/eval-fail-infinite-recursion-lambda.nix
 create mode 100644 tests/functional/lang/eval-fail-mutual-recursion.err.exp
 create mode 100644 tests/functional/lang/eval-fail-mutual-recursion.nix

diff --git a/doc/manual/rl-next/stack-overflow-segfaults.md b/doc/manual/rl-next/stack-overflow-segfaults.md
new file mode 100644
index 000000000..3d9753248
--- /dev/null
+++ b/doc/manual/rl-next/stack-overflow-segfaults.md
@@ -0,0 +1,32 @@
+---
+synopsis: Some stack overflow segfaults are fixed
+issues: 9616
+prs: 9617
+---
+
+The number of nested function calls has been restricted, to detect and report
+infinite function call recursions. The default maximum call depth is 10,000 and
+can be set with [the `max-call-depth`
+option](@docroot@/command-ref/conf-file.md#conf-max-call-depth).
+
+This fixes segfaults or the following unhelpful error message in many cases:
+
+    error: stack overflow (possible infinite recursion)
+
+Before:
+
+```
+$ nix-instantiate --eval --expr '(x: x x) (x: x x)'
+Segmentation fault: 11
+```
+
+After:
+
+```
+$ nix-instantiate --eval --expr '(x: x x) (x: x x)'
+error: stack overflow
+
+       at «string»:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+```
diff --git a/src/libexpr/eval-settings.hh b/src/libexpr/eval-settings.hh
index ad187ca01..2f6c12d45 100644
--- a/src/libexpr/eval-settings.hh
+++ b/src/libexpr/eval-settings.hh
@@ -124,6 +124,9 @@ struct EvalSettings : Config
 
     Setting<bool> traceVerbose{this, false, "trace-verbose",
         "Whether `builtins.traceVerbose` should trace its first argument when evaluated."};
+
+    Setting<unsigned int> maxCallDepth{this, 10000, "max-call-depth",
+        "The maximum function call depth to allow before erroring."};
 };
 
 extern EvalSettings evalSettings;
diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 810843995..f73e22ba0 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -1505,9 +1505,27 @@ void ExprLambda::eval(EvalState & state, Env & env, Value & v)
     v.mkLambda(&env, this);
 }
 
+namespace {
+/** Increments a count on construction and decrements on destruction.
+ */
+class CallDepth {
+  size_t & count;
+public:
+  CallDepth(size_t & count) : count(count) {
+    ++count;
+  }
+  ~CallDepth() {
+    --count;
+  }
+};
+};
 
 void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & vRes, const PosIdx pos)
 {
+    if (callDepth > evalSettings.maxCallDepth)
+        error("stack overflow; max-call-depth exceeded").atPos(pos).template debugThrow<EvalError>();
+    CallDepth _level(callDepth);
+
     auto trace = evalSettings.traceFunctionCalls
         ? std::make_unique<FunctionCallTrace>(positions[pos])
         : nullptr;
diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh
index da2d256db..7dbffe38c 100644
--- a/src/libexpr/eval.hh
+++ b/src/libexpr/eval.hh
@@ -622,6 +622,11 @@ private:
         const SourcePath & basePath,
         std::shared_ptr<StaticEnv> & staticEnv);
 
+    /**
+     * Current Nix call stack depth, used with `max-call-depth` setting to throw stack overflow hopefully before we run out of system stack.
+     */
+    size_t callDepth = 0;
+
 public:
 
     /**
diff --git a/src/libutil/error.cc b/src/libutil/error.cc
index bc0194d59..e42925c2b 100644
--- a/src/libutil/error.cc
+++ b/src/libutil/error.cc
@@ -50,6 +50,32 @@ std::ostream & operator <<(std::ostream & str, const AbstractPos & pos)
     return str;
 }
 
+/**
+ * An arbitrarily defined value comparison for the purpose of using traces in the key of a sorted container.
+ */
+inline bool operator<(const Trace& lhs, const Trace& rhs)
+{
+    // `std::shared_ptr` does not have value semantics for its comparison
+    // functions, so we need to check for nulls and compare the dereferenced
+    // values here.
+    if (lhs.pos != rhs.pos) {
+        if (!lhs.pos)
+            return true;
+        if (!rhs.pos)
+            return false;
+        if (*lhs.pos != *rhs.pos)
+            return *lhs.pos < *rhs.pos;
+    }
+    // This formats a freshly formatted hint string and then throws it away, which
+    // shouldn't be much of a problem because it only runs when pos is equal, and this function is
+    // used for trace printing, which is infrequent.
+    return std::forward_as_tuple(lhs.hint.str(), lhs.frame)
+        < std::forward_as_tuple(rhs.hint.str(), rhs.frame);
+}
+inline bool operator> (const Trace& lhs, const Trace& rhs) { return rhs < lhs; }
+inline bool operator<=(const Trace& lhs, const Trace& rhs) { return !(lhs > rhs); }
+inline bool operator>=(const Trace& lhs, const Trace& rhs) { return !(lhs < rhs); }
+
 std::optional<LinesOfCode> AbstractPos::getCodeLines() const
 {
     if (line == 0)
@@ -185,6 +211,69 @@ static bool printPosMaybe(std::ostream & oss, std::string_view indent, const std
     return hasPos;
 }
 
+void printTrace(
+    std::ostream & output,
+    const std::string_view & indent,
+    size_t & count,
+    const Trace & trace)
+{
+    output << "\n" << "… " << trace.hint.str() << "\n";
+
+    if (printPosMaybe(output, indent, trace.pos))
+        count++;
+}
+
+void printSkippedTracesMaybe(
+    std::ostream & output,
+    const std::string_view & indent,
+    size_t & count,
+    std::vector<Trace> & skippedTraces,
+    std::set<Trace> tracesSeen)
+{
+    if (skippedTraces.size() > 0) {
+        // If we only skipped a few frames, print them out normally;
+        // messages like "1 duplicate frames omitted" aren't helpful.
+        if (skippedTraces.size() <= 5) {
+            for (auto & trace : skippedTraces) {
+                printTrace(output, indent, count, trace);
+            }
+        } else {
+            output << "\n" << ANSI_WARNING "(" << skippedTraces.size() << " duplicate frames omitted)" ANSI_NORMAL << "\n";
+            // Clear the set of "seen" traces after printing a chunk of
+            // `duplicate frames omitted`.
+            //
+            // Consider a mutually recursive stack trace with:
+            // - 10 entries of A
+            // - 10 entries of B
+            // - 10 entries of A
+            //
+            // If we don't clear `tracesSeen` here, we would print output like this:
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            // - 1 entry of B
+            // - (19 duplicate frames omitted)
+            //
+            // This would obscure the control flow, which went from A,
+            // to B, and back to A again.
+            //
+            // In contrast, if we do clear `tracesSeen`, the output looks like this:
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            // - 1 entry of B
+            // - (9 duplicate frames omitted)
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            //
+            // See: `tests/functional/lang/eval-fail-mutual-recursion.nix`
+            tracesSeen.clear();
+        }
+    }
+    // We've either printed each trace in `skippedTraces` normally, or
+    // printed a chunk of `duplicate frames omitted`. Either way, we've
+    // processed these traces and can clear them.
+    skippedTraces.clear();
+}
+
 std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool showTrace)
 {
     std::string prefix;
@@ -333,7 +422,13 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s
 
     bool frameOnly = false;
     if (!einfo.traces.empty()) {
+        // Stack traces seen since we last printed a chunk of `duplicate frames
+        // omitted`.
+        std::set<Trace> tracesSeen;
+        // A consecutive sequence of stack traces that are all in `tracesSeen`.
+        std::vector<Trace> skippedTraces;
         size_t count = 0;
+
         for (const auto & trace : einfo.traces) {
             if (trace.hint.str().empty()) continue;
             if (frameOnly && !trace.frame) continue;
@@ -343,14 +438,21 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s
                 break;
             }
 
+            if (tracesSeen.count(trace)) {
+                skippedTraces.push_back(trace);
+                continue;
+            }
+            tracesSeen.insert(trace);
+
+            printSkippedTracesMaybe(oss, ellipsisIndent, count, skippedTraces, tracesSeen);
+
             count++;
             frameOnly = trace.frame;
 
-            oss << "\n" << "… " << trace.hint.str() << "\n";
-
-            if (printPosMaybe(oss, ellipsisIndent, trace.pos))
-                count++;
+            printTrace(oss, ellipsisIndent, count, trace);
         }
+
+        printSkippedTracesMaybe(oss, ellipsisIndent, count, skippedTraces, tracesSeen);
         oss << "\n" << prefix;
     }
 
@@ -369,4 +471,5 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s
 
     return out;
 }
+
 }
diff --git a/src/libutil/error.hh b/src/libutil/error.hh
index c04dcbd77..baffca128 100644
--- a/src/libutil/error.hh
+++ b/src/libutil/error.hh
@@ -25,6 +25,7 @@
 #include <memory>
 #include <map>
 #include <optional>
+#include <compare>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -88,6 +89,8 @@ struct AbstractPos
     std::optional<LinesOfCode> getCodeLines() const;
 
     virtual ~AbstractPos() = default;
+
+    inline auto operator<=>(const AbstractPos& rhs) const = default;
 };
 
 std::ostream & operator << (std::ostream & str, const AbstractPos & pos);
@@ -103,6 +106,11 @@ struct Trace {
     bool frame;
 };
 
+inline bool operator<(const Trace& lhs, const Trace& rhs);
+inline bool operator> (const Trace& lhs, const Trace& rhs);
+inline bool operator<=(const Trace& lhs, const Trace& rhs);
+inline bool operator>=(const Trace& lhs, const Trace& rhs);
+
 struct ErrorInfo {
     Verbosity level;
     hintformat msg;
diff --git a/tests/functional/lang/eval-fail-duplicate-traces.err.exp b/tests/functional/lang/eval-fail-duplicate-traces.err.exp
new file mode 100644
index 000000000..32ad9b376
--- /dev/null
+++ b/tests/functional/lang/eval-fail-duplicate-traces.err.exp
@@ -0,0 +1,44 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:9:3:
+            8| in
+            9|   throwAfter 2
+             |   ^
+           10|
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:6:10:
+            5|     if n > 0
+            6|     then throwAfter (n - 1)
+             |          ^
+            7|     else throw "Uh oh!";
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:6:10:
+            5|     if n > 0
+            6|     then throwAfter (n - 1)
+             |          ^
+            7|     else throw "Uh oh!";
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       error: Uh oh!
diff --git a/tests/functional/lang/eval-fail-duplicate-traces.nix b/tests/functional/lang/eval-fail-duplicate-traces.nix
new file mode 100644
index 000000000..17ce374ec
--- /dev/null
+++ b/tests/functional/lang/eval-fail-duplicate-traces.nix
@@ -0,0 +1,9 @@
+# Check that we only omit duplicate stack traces when there's a bunch of them.
+# Here, there's only a couple duplicate entries, so we output them all.
+let
+  throwAfter = n:
+    if n > 0
+    then throwAfter (n - 1)
+    else throw "Uh oh!";
+in
+  throwAfter 2
diff --git a/tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp b/tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp
new file mode 100644
index 000000000..5d843d827
--- /dev/null
+++ b/tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp
@@ -0,0 +1,38 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:1:
+            1| (x: x x) (x: x x)
+             | ^
+            2|
+
+       … while calling anonymous lambda
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:2:
+            1| (x: x x) (x: x x)
+             |  ^
+            2|
+
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:5:
+            1| (x: x x) (x: x x)
+             |     ^
+            2|
+
+       … while calling anonymous lambda
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:11:
+            1| (x: x x) (x: x x)
+             |           ^
+            2|
+
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+            2|
+
+       (19997 duplicate frames omitted)
+
+       error: stack overflow; max-call-depth exceeded
+       at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+            2|
diff --git a/tests/functional/lang/eval-fail-infinite-recursion-lambda.nix b/tests/functional/lang/eval-fail-infinite-recursion-lambda.nix
new file mode 100644
index 000000000..dd0a8bf2e
--- /dev/null
+++ b/tests/functional/lang/eval-fail-infinite-recursion-lambda.nix
@@ -0,0 +1 @@
+(x: x x) (x: x x)
diff --git a/tests/functional/lang/eval-fail-mutual-recursion.err.exp b/tests/functional/lang/eval-fail-mutual-recursion.err.exp
new file mode 100644
index 000000000..dc2e11766
--- /dev/null
+++ b/tests/functional/lang/eval-fail-mutual-recursion.err.exp
@@ -0,0 +1,57 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:36:3:
+           35| in
+           36|   throwAfterA true 10
+             |   ^
+           37|
+
+       … while calling 'throwAfterA'
+         at /pwd/lang/eval-fail-mutual-recursion.nix:29:26:
+           28|
+           29|   throwAfterA = recurse: n:
+             |                          ^
+           30|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:31:10:
+           30|     if n > 0
+           31|     then throwAfterA recurse (n - 1)
+             |          ^
+           32|     else if recurse
+
+       (19 duplicate frames omitted)
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:33:10:
+           32|     else if recurse
+           33|     then throwAfterB true 10
+             |          ^
+           34|     else throw "Uh oh!";
+
+       … while calling 'throwAfterB'
+         at /pwd/lang/eval-fail-mutual-recursion.nix:22:26:
+           21| let
+           22|   throwAfterB = recurse: n:
+             |                          ^
+           23|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:24:10:
+           23|     if n > 0
+           24|     then throwAfterB recurse (n - 1)
+             |          ^
+           25|     else if recurse
+
+       (19 duplicate frames omitted)
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:26:10:
+           25|     else if recurse
+           26|     then throwAfterA false 10
+             |          ^
+           27|     else throw "Uh oh!";
+
+       (21 duplicate frames omitted)
+
+       error: Uh oh!
diff --git a/tests/functional/lang/eval-fail-mutual-recursion.nix b/tests/functional/lang/eval-fail-mutual-recursion.nix
new file mode 100644
index 000000000..d090d3158
--- /dev/null
+++ b/tests/functional/lang/eval-fail-mutual-recursion.nix
@@ -0,0 +1,36 @@
+# Check that stack frame deduplication only affects consecutive intervals, and
+# that they are reported independently of any preceding sections, even if
+# they're indistinguishable.
+#
+# In terms of the current implementation, we check that we clear the set of
+# "seen frames" after eliding a group of frames.
+#
+# Suppose we have:
+# - 10 frames in a function A
+# - 10 frames in a function B
+# - 10 frames in a function A
+#
+# We want to output:
+# - a few frames of A (skip the rest)
+# - a few frames of B (skip the rest)
+# - a few frames of A (skip the rest)
+#
+# If we implemented this in the naive manner, we'd instead get:
+# - a few frames of A (skip the rest)
+# - a few frames of B (skip the rest, _and_ skip the remaining frames of A)
+let
+  throwAfterB = recurse: n:
+    if n > 0
+    then throwAfterB recurse (n - 1)
+    else if recurse
+    then throwAfterA false 10
+    else throw "Uh oh!";
+
+  throwAfterA = recurse: n:
+    if n > 0
+    then throwAfterA recurse (n - 1)
+    else if recurse
+    then throwAfterB true 10
+    else throw "Uh oh!";
+in
+  throwAfterA true 10