Fix segfault on infinite recursion in some cases

This fixes a segfault on infinite function call recursion (rather than infinite thunk recursion) by tracking the function call depth in `EvalState`. Additionally, to avoid printing extremely long stack traces, stack frames are now deduplicated, with a `(19997 duplicate traces omitted)` message. This should only really be triggered in infinite recursion scenarios. Before: $ nix-instantiate --eval --expr '(x: x x) (x: x x)' Segmentation fault: 11 After: $ nix-instantiate --eval --expr '(x: x x) (x: x x)' error: stack overflow at «string»:1:14: 1| (x: x x) (x: x x) | ^ $ nix-instantiate --eval --expr '(x: x x) (x: x x)' --show-trace error: … from call site at «string»:1:1: 1| (x: x x) (x: x x) | ^ … while calling anonymous lambda at «string»:1:2: 1| (x: x x) (x: x x) | ^ … from call site at «string»:1:5: 1| (x: x x) (x: x x) | ^ … while calling anonymous lambda at «string»:1:11: 1| (x: x x) (x: x x) | ^ … from call site at «string»:1:14: 1| (x: x x) (x: x x) | ^ (19997 duplicate traces omitted) error: stack overflow at «string»:1:14: 1| (x: x x) (x: x x) | ^
2023-12-15 11:52:21 -08:00 · 2023-12-15 11:52:21 -08:00 · 7434caca05
parent a21c762dab
commit 7434caca05
12 changed files with 358 additions and 4 deletions
--- a/doc/manual/rl-next/stack-overflow-segfaults.md
+++ b/doc/manual/rl-next/stack-overflow-segfaults.md
@ -0,0 +1,32 @@
+---
+synopsis: Some stack overflow segfaults are fixed
+issues: 9616
+prs: 9617
+---
+
+The number of nested function calls has been restricted, to detect and report
+infinite function call recursions. The default maximum call depth is 10,000 and
+can be set with [the `max-call-depth`
+option](@docroot@/command-ref/conf-file.md#conf-max-call-depth).
+
+This fixes segfaults or the following unhelpful error message in many cases:
+
+    error: stack overflow (possible infinite recursion)
+
+Before:
+
+```
+$ nix-instantiate --eval --expr '(x: x x) (x: x x)'
+Segmentation fault: 11
+```
+
+After:
+
+```
+$ nix-instantiate --eval --expr '(x: x x) (x: x x)'
+error: stack overflow
+
+       at «string»:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+```
--- a/src/libexpr/eval-settings.hh
+++ b/src/libexpr/eval-settings.hh
@ -124,6 +124,9 @@ struct EvalSettings : Config

    Setting<bool> traceVerbose{this, false, "trace-verbose",
        "Whether `builtins.traceVerbose` should trace its first argument when evaluated."};
+
+    Setting<unsigned int> maxCallDepth{this, 10000, "max-call-depth",
+        "The maximum function call depth to allow before erroring."};
 };

 extern EvalSettings evalSettings;
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@ -1505,9 +1505,27 @@ void ExprLambda::eval(EvalState & state, Env & env, Value & v)
    v.mkLambda(&env, this);
 }

+namespace {
+/** Increments a count on construction and decrements on destruction.
+ */
+class CallDepth {
+  size_t & count;
+public:
+  CallDepth(size_t & count) : count(count) {
+    ++count;
+  }
+  ~CallDepth() {
+    --count;
+  }
+};
+};

 void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & vRes, const PosIdx pos)
 {
+    if (callDepth > evalSettings.maxCallDepth)
+        error("stack overflow; max-call-depth exceeded").atPos(pos).template debugThrow<EvalError>();
+    CallDepth _level(callDepth);
+
    auto trace = evalSettings.traceFunctionCalls
        ? std::make_unique<FunctionCallTrace>(positions[pos])
        : nullptr;
--- a/src/libexpr/eval.hh
+++ b/src/libexpr/eval.hh
@ -622,6 +622,11 @@ private:
        const SourcePath & basePath,
        std::shared_ptr<StaticEnv> & staticEnv);

+    /**
+     * Current Nix call stack depth, used with `max-call-depth` setting to throw stack overflow hopefully before we run out of system stack.
+     */
+    size_t callDepth = 0;
+
 public:

    /**
--- a/src/libutil/error.cc
+++ b/src/libutil/error.cc
@ -50,6 +50,32 @@ std::ostream & operator <<(std::ostream & str, const AbstractPos & pos)
    return str;
 }

+/**
+ * An arbitrarily defined value comparison for the purpose of using traces in the key of a sorted container.
+ */
+inline bool operator<(const Trace& lhs, const Trace& rhs)
+{
+    // `std::shared_ptr` does not have value semantics for its comparison
+    // functions, so we need to check for nulls and compare the dereferenced
+    // values here.
+    if (lhs.pos != rhs.pos) {
+        if (!lhs.pos)
+            return true;
+        if (!rhs.pos)
+            return false;
+        if (*lhs.pos != *rhs.pos)
+            return *lhs.pos < *rhs.pos;
+    }
+    // This formats a freshly formatted hint string and then throws it away, which
+    // shouldn't be much of a problem because it only runs when pos is equal, and this function is
+    // used for trace printing, which is infrequent.
+    return std::forward_as_tuple(lhs.hint.str(), lhs.frame)
+        < std::forward_as_tuple(rhs.hint.str(), rhs.frame);
+}
+inline bool operator> (const Trace& lhs, const Trace& rhs) { return rhs < lhs; }
+inline bool operator<=(const Trace& lhs, const Trace& rhs) { return !(lhs > rhs); }
+inline bool operator>=(const Trace& lhs, const Trace& rhs) { return !(lhs < rhs); }
+
 std::optional<LinesOfCode> AbstractPos::getCodeLines() const
 {
    if (line == 0)
@ -185,6 +211,69 @@ static bool printPosMaybe(std::ostream & oss, std::string_view indent, const std
    return hasPos;
 }

+void printTrace(
+    std::ostream & output,
+    const std::string_view & indent,
+    size_t & count,
+    const Trace & trace)
+{
+    output << "\n" << "… " << trace.hint.str() << "\n";
+
+    if (printPosMaybe(output, indent, trace.pos))
+        count++;
+}
+
+void printSkippedTracesMaybe(
+    std::ostream & output,
+    const std::string_view & indent,
+    size_t & count,
+    std::vector<Trace> & skippedTraces,
+    std::set<Trace> tracesSeen)
+{
+    if (skippedTraces.size() > 0) {
+        // If we only skipped a few frames, print them out normally;
+        // messages like "1 duplicate frames omitted" aren't helpful.
+        if (skippedTraces.size() <= 5) {
+            for (auto & trace : skippedTraces) {
+                printTrace(output, indent, count, trace);
+            }
+        } else {
+            output << "\n" << ANSI_WARNING "(" << skippedTraces.size() << " duplicate frames omitted)" ANSI_NORMAL << "\n";
+            // Clear the set of "seen" traces after printing a chunk of
+            // `duplicate frames omitted`.
+            //
+            // Consider a mutually recursive stack trace with:
+            // - 10 entries of A
+            // - 10 entries of B
+            // - 10 entries of A
+            //
+            // If we don't clear `tracesSeen` here, we would print output like this:
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            // - 1 entry of B
+            // - (19 duplicate frames omitted)
+            //
+            // This would obscure the control flow, which went from A,
+            // to B, and back to A again.
+            //
+            // In contrast, if we do clear `tracesSeen`, the output looks like this:
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            // - 1 entry of B
+            // - (9 duplicate frames omitted)
+            // - 1 entry of A
+            // - (9 duplicate frames omitted)
+            //
+            // See: `tests/functional/lang/eval-fail-mutual-recursion.nix`
+            tracesSeen.clear();
+        }
+    }
+    // We've either printed each trace in `skippedTraces` normally, or
+    // printed a chunk of `duplicate frames omitted`. Either way, we've
+    // processed these traces and can clear them.
+    skippedTraces.clear();
+}
+
 std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool showTrace)
 {
    std::string prefix;
@ -333,7 +422,13 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s

    bool frameOnly = false;
    if (!einfo.traces.empty()) {
+        // Stack traces seen since we last printed a chunk of `duplicate frames
+        // omitted`.
+        std::set<Trace> tracesSeen;
+        // A consecutive sequence of stack traces that are all in `tracesSeen`.
+        std::vector<Trace> skippedTraces;
        size_t count = 0;
+
        for (const auto & trace : einfo.traces) {
            if (trace.hint.str().empty()) continue;
            if (frameOnly && !trace.frame) continue;
@ -343,14 +438,21 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s
                break;
            }

+            if (tracesSeen.count(trace)) {
+                skippedTraces.push_back(trace);
+                continue;
+            }
+            tracesSeen.insert(trace);
+
+            printSkippedTracesMaybe(oss, ellipsisIndent, count, skippedTraces, tracesSeen);
+
            count++;
            frameOnly = trace.frame;

-            oss << "\n" << "… " << trace.hint.str() << "\n";
-
-            if (printPosMaybe(oss, ellipsisIndent, trace.pos))
-                count++;
+            printTrace(oss, ellipsisIndent, count, trace);
        }
+
+        printSkippedTracesMaybe(oss, ellipsisIndent, count, skippedTraces, tracesSeen);
        oss << "\n" << prefix;
    }

@ -369,4 +471,5 @@ std::ostream & showErrorInfo(std::ostream & out, const ErrorInfo & einfo, bool s

    return out;
 }
+
 }
--- a/src/libutil/error.hh
+++ b/src/libutil/error.hh
@ -25,6 +25,7 @@
 #include <memory>
 #include <map>
 #include <optional>
+#include <compare>

 #include <sys/types.h>
 #include <sys/stat.h>
@ -88,6 +89,8 @@ struct AbstractPos
    std::optional<LinesOfCode> getCodeLines() const;

    virtual ~AbstractPos() = default;
+
+    inline auto operator<=>(const AbstractPos& rhs) const = default;
 };

 std::ostream & operator << (std::ostream & str, const AbstractPos & pos);
@ -103,6 +106,11 @@ struct Trace {
    bool frame;
 };

+inline bool operator<(const Trace& lhs, const Trace& rhs);
+inline bool operator> (const Trace& lhs, const Trace& rhs);
+inline bool operator<=(const Trace& lhs, const Trace& rhs);
+inline bool operator>=(const Trace& lhs, const Trace& rhs);
+
 struct ErrorInfo {
    Verbosity level;
    hintformat msg;
--- a/tests/functional/lang/eval-fail-duplicate-traces.err.exp
+++ b/tests/functional/lang/eval-fail-duplicate-traces.err.exp
@ -0,0 +1,44 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:9:3:
+            8| in
+            9|   throwAfter 2
+             |   ^
+           10|
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:6:10:
+            5|     if n > 0
+            6|     then throwAfter (n - 1)
+             |          ^
+            7|     else throw "Uh oh!";
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-duplicate-traces.nix:6:10:
+            5|     if n > 0
+            6|     then throwAfter (n - 1)
+             |          ^
+            7|     else throw "Uh oh!";
+
+       … while calling 'throwAfter'
+         at /pwd/lang/eval-fail-duplicate-traces.nix:4:16:
+            3| let
+            4|   throwAfter = n:
+             |                ^
+            5|     if n > 0
+
+       error: Uh oh!
--- a/tests/functional/lang/eval-fail-duplicate-traces.nix
+++ b/tests/functional/lang/eval-fail-duplicate-traces.nix
@ -0,0 +1,9 @@
+# Check that we only omit duplicate stack traces when there's a bunch of them.
+# Here, there's only a couple duplicate entries, so we output them all.
+let
+  throwAfter = n:
+    if n > 0
+    then throwAfter (n - 1)
+    else throw "Uh oh!";
+in
+  throwAfter 2
--- a/tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp
+++ b/tests/functional/lang/eval-fail-infinite-recursion-lambda.err.exp
@ -0,0 +1,38 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:1:
+            1| (x: x x) (x: x x)
+             | ^
+            2|
+
+       … while calling anonymous lambda
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:2:
+            1| (x: x x) (x: x x)
+             |  ^
+            2|
+
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:5:
+            1| (x: x x) (x: x x)
+             |     ^
+            2|
+
+       … while calling anonymous lambda
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:11:
+            1| (x: x x) (x: x x)
+             |           ^
+            2|
+
+       … from call site
+         at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+            2|
+
+       (19997 duplicate frames omitted)
+
+       error: stack overflow; max-call-depth exceeded
+       at /pwd/lang/eval-fail-infinite-recursion-lambda.nix:1:14:
+            1| (x: x x) (x: x x)
+             |              ^
+            2|
--- a/tests/functional/lang/eval-fail-infinite-recursion-lambda.nix
+++ b/tests/functional/lang/eval-fail-infinite-recursion-lambda.nix
@ -0,0 +1 @@
+(x: x x) (x: x x)
--- a/tests/functional/lang/eval-fail-mutual-recursion.err.exp
+++ b/tests/functional/lang/eval-fail-mutual-recursion.err.exp
@ -0,0 +1,57 @@
+error:
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:36:3:
+           35| in
+           36|   throwAfterA true 10
+             |   ^
+           37|
+
+       … while calling 'throwAfterA'
+         at /pwd/lang/eval-fail-mutual-recursion.nix:29:26:
+           28|
+           29|   throwAfterA = recurse: n:
+             |                          ^
+           30|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:31:10:
+           30|     if n > 0
+           31|     then throwAfterA recurse (n - 1)
+             |          ^
+           32|     else if recurse
+
+       (19 duplicate frames omitted)
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:33:10:
+           32|     else if recurse
+           33|     then throwAfterB true 10
+             |          ^
+           34|     else throw "Uh oh!";
+
+       … while calling 'throwAfterB'
+         at /pwd/lang/eval-fail-mutual-recursion.nix:22:26:
+           21| let
+           22|   throwAfterB = recurse: n:
+             |                          ^
+           23|     if n > 0
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:24:10:
+           23|     if n > 0
+           24|     then throwAfterB recurse (n - 1)
+             |          ^
+           25|     else if recurse
+
+       (19 duplicate frames omitted)
+
+       … from call site
+         at /pwd/lang/eval-fail-mutual-recursion.nix:26:10:
+           25|     else if recurse
+           26|     then throwAfterA false 10
+             |          ^
+           27|     else throw "Uh oh!";
+
+       (21 duplicate frames omitted)
+
+       error: Uh oh!
--- a/tests/functional/lang/eval-fail-mutual-recursion.nix
+++ b/tests/functional/lang/eval-fail-mutual-recursion.nix
@ -0,0 +1,36 @@
+# Check that stack frame deduplication only affects consecutive intervals, and
+# that they are reported independently of any preceding sections, even if
+# they're indistinguishable.
+#
+# In terms of the current implementation, we check that we clear the set of
+# "seen frames" after eliding a group of frames.
+#
+# Suppose we have:
+# - 10 frames in a function A
+# - 10 frames in a function B
+# - 10 frames in a function A
+#
+# We want to output:
+# - a few frames of A (skip the rest)
+# - a few frames of B (skip the rest)
+# - a few frames of A (skip the rest)
+#
+# If we implemented this in the naive manner, we'd instead get:
+# - a few frames of A (skip the rest)
+# - a few frames of B (skip the rest, _and_ skip the remaining frames of A)
+let
+  throwAfterB = recurse: n:
+    if n > 0
+    then throwAfterB recurse (n - 1)
+    else if recurse
+    then throwAfterA false 10
+    else throw "Uh oh!";
+
+  throwAfterA = recurse: n:
+    if n > 0
+    then throwAfterA recurse (n - 1)
+    else if recurse
+    then throwAfterB true 10
+    else throw "Uh oh!";
+in
+  throwAfterA true 10