From 25c889baacd6a8b9b66ce4776ec729a40e39cf77 Mon Sep 17 00:00:00 2001 From: Mel Zuser Date: Thu, 11 Jan 2024 14:40:54 -0800 Subject: [PATCH] Fix performance of builtins.substring for empty substrings When returning a 0-length substring, avoid calling coerceToString, since it returns a string_view with the string's length, which is expensive to compute for large strings. --- src/libexpr/primops.cc | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index ee07e5568..c08aea898 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -3712,9 +3712,6 @@ static RegisterPrimOp primop_toString({ static void prim_substring(EvalState & state, const PosIdx pos, Value * * args, Value & v) { int start = state.forceInt(*args[0], pos, "while evaluating the first argument (the start offset) passed to builtins.substring"); - int len = state.forceInt(*args[1], pos, "while evaluating the second argument (the substring length) passed to builtins.substring"); - NixStringContext context; - auto s = state.coerceToString(pos, *args[2], context, "while evaluating the third argument (the string) passed to builtins.substring"); if (start < 0) state.debugThrowLastTrace(EvalError({ @@ -3722,6 +3719,22 @@ static void prim_substring(EvalState & state, const PosIdx pos, Value * * args, .errPos = state.positions[pos] })); + + int len = state.forceInt(*args[1], pos, "while evaluating the second argument (the substring length) passed to builtins.substring"); + + // Special-case on empty substring to avoid O(n) strlen + // This allows for the use of empty substrings to efficently capture string context + if (len == 0) { + state.forceValue(*args[2], pos); + if (args[2]->type() == nString) { + v.mkString("", args[2]->context()); + return; + } + } + + NixStringContext context; + auto s = state.coerceToString(pos, *args[2], context, "while evaluating the third argument (the string) passed to builtins.substring"); + v.mkString((unsigned int) start >= s->size() ? "" : s->substr(start, len), context); }