Compare commits

...

10 Commits

Author SHA1 Message Date
Eelco Dolstra a10da8466f * Option to turn off position information to test the impact on
maximal sharing.
2007-10-17 12:36:37 +00:00
Eelco Dolstra e23d134b85 * Memoize the substitution function.
* Print some substitution statistics.
* Option to turn off the closed term optimization.
2007-10-15 12:08:31 +00:00
Eelco Dolstra c3a79daaf3 * Short-circuiting of function call evaluation.
With maximal laziness, you would expect that a function like this

    fib = n:
      if n == 0 then 0 else
      if n == 1 then 1 else
      builtins.add (fib (builtins.sub n 1)) (fib (builtins.sub n 2));

  can be evaluated efficiently, because maximal laziness should
  implictly memoize the recursive calls to "fib".  However, non-strictness
  interferes with this: the argument "n" is generally not in a form
  that allows the memoization to work (e.g., it will be something like
  (20 - 1 - 2 - 2) rather than 15).  By the time that "n" is
  evaluated (in "if n == 0 ..."), we're already deep in the evaluation
  of the call.

  (Strictness solves this:

      builtins.add (strict fib (builtins.sub n 1)) (strict fib (builtins.sub n 2));

  but that's not a very nice approach.)

  With short-circuiting, the evaluator will check after evaluating a
  term, whether that term is the argument of a function call that
  we're currently evaluating.  If so, it will check to see if the same
  call but with the evaluated argument is in the normal form cache.

  For instance, after evaluating (20 - 1 - 2 - 2) to 15, if we see
  that "fib (20 - 1 - 2 - 2)" is currently being evaluated, we check
  to see if "fib 15" is in the normal form cache.  If so, we unwind
  the stack (by throwing an exception) up to the evalExpr call
  responsible for "fib (20 - 1 - 2 - 2)", which can then immediately
  return the normal form for "fib 15".  And indeed this makes "fib"
  run in O(n) time.

  The overhead for checking the active function calls (which isn't
  very smart yet) seems to be modest, about 2% for "nix-env -qa
  --drv-path --out-path" on Nixpkgs.
2007-10-12 17:53:47 +00:00
Eelco Dolstra 74ce938e18 * Simulate conventional laziness a bit better still by "allocating"
all local variables when entering a new scope.  I.e., don't do
  implicit let-floating.
2007-10-11 22:42:09 +00:00
Eelco Dolstra 981afe821c * Some hacks to simulate conventional laziness. 2007-10-11 21:58:37 +00:00
Eelco Dolstra cd9d10d4e3 * Caching of parse results for fairer comparisons. 2007-10-11 20:02:08 +00:00
Eelco Dolstra c1179badd5 * Playing with strictness. 2007-10-11 14:07:00 +00:00
Eelco Dolstra 3d14ed9270 * A primop for calling functions strictly (i.e. forcing evaluation of
argument).  Necessary to actually get memoisation of functions like
  "fib" with maximal laziness.
2007-10-11 14:06:43 +00:00
Eelco Dolstra 8e0488370d * Environment variable to disable normal form caching. 2007-10-11 12:09:06 +00:00
Eelco Dolstra b57f8bd38d * PEPM related hacks. 2007-10-11 12:07:49 +00:00
7 changed files with 346 additions and 48 deletions

View File

@ -1,3 +1,5 @@
#include <iostream>
#include "eval.hh"
#include "parser.hh"
#include "hash.hh"
@ -13,6 +15,19 @@
namespace nix {
int cacheTerms;
bool shortCircuit;
bool closedTerms; // don't substitute under terms known to be closed
bool substCache; // memoization of the term substitution function
bool posInfo; // attach position info to functions, assertions, attributes
#define maxActiveCalls 4096
ATerm activeCalls[maxActiveCalls];
unsigned int activeCallsCount = 0;
EvalState::EvalState()
@ -23,6 +38,15 @@ EvalState::EvalState()
initNixExprHelpers();
addPrimOps();
if (!string2Int(getEnv("NIX_TERM_CACHE"), cacheTerms)) cacheTerms = 1;
shortCircuit = getEnv("NIX_SHORT_CIRCUIT", "0") == "1";
strictMode = getEnv("NIX_STRICT", "0") == "1";
closedTerms = getEnv("NIX_CLOSED_TERMS", "1") == "1";
substCache = getEnv("NIX_SUBST_CACHE", "1") == "1";
posInfo = getEnv("NIX_POS_INFO", "1") == "1";
ATprotectMemory(activeCalls, maxActiveCalls);
}
@ -69,6 +93,19 @@ LocalNoInline(void addErrorPrefix(Error & e, const char * s, const string & s2,
}
Expr speculativeEval(EvalState & state, Expr e)
{
if (!state.strictMode) return e;
try {
return evalExpr(state, e);
} catch (EvalError & err) {
/* ignore, pass the original arg and depend on
laziness */
return e;
}
}
/* Substitute an argument set into the body of a function. */
static Expr substArgs(EvalState & state,
Expr body, ATermList formals, Expr arg)
@ -80,7 +117,7 @@ static Expr substArgs(EvalState & state,
ATermMap args;
queryAllAttrs(arg, args);
for (ATermMap::const_iterator i = args.begin(); i != args.end(); ++i)
subs.set(i->key, i->value);
subs.set(i->key, speculativeEval(state, i->value));
/* Get the formal arguments. */
ATermVector defsUsed;
@ -389,7 +426,7 @@ Expr autoCallFunction(Expr e, const ATermMap & args)
Expr name, def, value; ATerm values, def2;
if (!matchFormal(*i, name, values, def2)) abort();
if ((value = args.get(name)))
actualArgs.set(name, makeAttrRHS(value, makeNoPos()));
actualArgs.set(name, makeAttrRHS(allocCell(value), makeNoPos()));
else if (!matchDefaultValue(def2, def))
throw TypeError(format("cannot auto-call a function that has an argument without a default value (`%1%')")
% aterm2String(name));
@ -457,7 +494,7 @@ LocalNoInline(Expr evalCall(EvalState & state, Expr fun, Expr arg))
else if (matchFunction(fun, formals, body, pos)) {
arg = evalExpr(state, arg);
try {
return evalExpr(state, substArgs(state, body, formals, arg));
return evalExpr(state, substArgs(state, allocCells(body), formals, arg));
} catch (Error & e) {
addErrorPrefix(e, "while evaluating the function at %1%:\n",
showPos(pos));
@ -467,9 +504,10 @@ LocalNoInline(Expr evalCall(EvalState & state, Expr fun, Expr arg))
else if (matchFunction1(fun, name, body, pos)) {
try {
arg = speculativeEval(state, arg);
ATermMap subs(1);
subs.set(name, arg);
return evalExpr(state, substitute(Substitution(0, &subs), body));
subs.set(name, allocCell(arg));
return evalExpr(state, substitute(Substitution(0, &subs), allocCells(body)));
} catch (Error & e) {
addErrorPrefix(e, "while evaluating the function at %1%:\n",
showPos(pos));
@ -624,6 +662,10 @@ Expr evalExpr2(EvalState & state, Expr e)
Expr e1, e2, e3;
ATerm name, pos;
int bla;
if (matchCell(e, bla, e1)) e = e1;
AFun sym = ATgetAFun(e);
/* Normal forms. */
@ -715,41 +757,141 @@ Expr evalExpr2(EvalState & state, Expr e)
if (matchOpConcat(e, e1, e2)) return evalOpConcat(state, e1, e2);
/* Barf. */
//printMsg(lvlError, format("%1%") % e);
abort();
}
class ShortCircuit
{
};
unsigned int fnord;
void maybeShortCircuit(EvalState & state, Expr e, Expr nf)
{
for (unsigned int i = 0; i < activeCallsCount; ++i) {
Expr fun, arg;
if (!matchCall(activeCalls[i], fun, arg)) abort();
if (arg == e) {
//printMsg(lvlError, format("blaat"));
//printMsg(lvlError, format("blaat %1% %2% %3%") % fun % arg % e);
Expr res = state.normalForms.get(makeCall(fun, nf));
if (res) {
fnord++;
//printMsg(lvlError, format("blaat"));
throw ShortCircuit();
}
}
}
}
Expr evalExpr(EvalState & state, Expr e)
{
checkInterrupt();
#if 0
#if 1
startNest(nest, lvlVomit,
format("evaluating expression: %1%") % e);
#endif
state.nrEvaluated++;
if (cacheTerms == 0) return evalExpr2(state, e);
if (cacheTerms == 2) {
int pseudoAddr;
Expr e2;
if (!matchCell(e, pseudoAddr, e2)) return evalExpr2(state, e);
}
/* Consult the memo table to quickly get the normal form of
previously evaluated expressions. */
Expr nf = state.normalForms.get(e);
if (nf) {
if (nf == makeBlackHole())
throwEvalError("infinite recursion encountered");
//if (nf == makeBlackHole())
// throwEvalError("infinite recursion encountered");
state.nrCached++;
return nf;
}
/* Otherwise, evaluate and memoize. */
state.normalForms.set(e, makeBlackHole());
try {
nf = evalExpr2(state, e);
} catch (Error & err) {
state.normalForms.remove(e);
throw;
Expr fun, arg;
if (shortCircuit && matchCall(e, fun, arg)) {
#if 0
Expr arg2 = state.normalForms.get(arg);
if (arg2) { /* the evaluated argument is now known */
//printMsg(lvlError, "foo");
/* do we know the result of the same function called
with the evaluated argument? */
Expr res = state.normalForms.get(makeCall(fun, arg2));
if (res) { /* woohoo! */
printMsg(lvlError, "dingdong");
state.normalForms.set(e, res);
return res;
}
}
#endif
assert(activeCallsCount < maxActiveCalls);
activeCalls[activeCallsCount++] = e;
//state.normalForms.set(e, makeBlackHole());
try {
nf = evalExpr2(state, e);
}
catch (ShortCircuit & exception) {
//printMsg(lvlError, "catch!");
Expr arg2 = state.normalForms.get(arg);
if (arg2) { /* the evaluated argument is now known */
/* do we know the result of the same function called
with the evaluated argument? */
Expr res = state.normalForms.get(makeCall(fun, arg2));
if (res) { /* woohoo! */
//printMsg(lvlError, "woohoo!");
//printMsg(lvlError, format("woohoo! %1% %2% %3% %4%") % fun % arg % arg2 % res);
activeCallsCount--;
state.normalForms.set(e, res);
maybeShortCircuit(state, e, res);
return res;
}
}
activeCallsCount--;
state.normalForms.remove(e);
throw; /* not for us */
}
catch (...) {
activeCallsCount--;
state.normalForms.remove(e);
throw;
}
activeCallsCount--;
state.normalForms.set(e, nf);
Expr arg2 = state.normalForms.get(arg);
if (arg2) state.normalForms.set(makeCall(fun, arg2), nf);
maybeShortCircuit(state, e, nf);
return nf;
}
else {
/* Otherwise, evaluate and memoize. */
//state.normalForms.set(e, makeBlackHole());
try {
nf = evalExpr2(state, e);
} catch (...) {
state.normalForms.remove(e);
throw;
}
state.normalForms.set(e, nf);
if (shortCircuit) maybeShortCircuit(state, e, nf);
return nf;
}
state.normalForms.set(e, nf);
return nf;
}
@ -845,16 +987,24 @@ extern "C" {
unsigned long AT_calcAllocatedSize();
}
unsigned int substs = 0;
unsigned int substsCached = 0;
void printEvalStats(EvalState & state)
{
char x;
bool showStats = getEnv("NIX_SHOW_STATS", "0") != "0";
printMsg(lvlError, format("FNORD %1%") % fnord);
printMsg(showStats ? lvlInfo : lvlDebug,
format("evaluated %1% expressions, %2% cache hits, %3%%% efficiency, used %4% ATerm bytes, used %5% bytes of stack space")
format("evaluated %1% expressions, %2% cache hits, %3%%% efficiency, used %4% ATerm bytes, used %5% bytes of stack space, %6% substitutions (%7% cached)")
% state.nrEvaluated % state.nrCached
% ((float) state.nrCached / (float) state.nrEvaluated * 100)
% AT_calcAllocatedSize()
% (&x - deepestStack));
% (&x - deepestStack)
% substs
% substsCached);
if (showStats)
printATermMapStats();
}

View File

@ -27,6 +27,9 @@ struct EvalState;
typedef Expr (* PrimOp) (EvalState &, const ATermVector & args);
extern int cacheTerms; // 0 = don't, 1 = do, 2 = "cell" terms only
struct EvalState
{
ATermMap normalForms;
@ -38,6 +41,10 @@ struct EvalState
unsigned int nrEvaluated;
unsigned int nrCached;
bool strictMode;
ATermMap parsings; /* path -> expr mapping */
EvalState();
void addPrimOps();

View File

@ -73,6 +73,8 @@ Inherit | Expr ATermList Pos | ATerm |
Scope | | Expr |
Cell | int Expr | Expr |
Formal | string ValidValues DefaultValue | ATerm |
ValidValues | ATermList | ValidValues |

View File

@ -2,6 +2,7 @@
#include "derivations.hh"
#include "util.hh"
#include "aterm.hh"
#include "eval.hh" // !!! urgh
#include "nixexpr-ast.hh"
#include "nixexpr-ast.cc"
@ -108,7 +109,16 @@ Expr makeAttrs(const ATermMap & attrs)
}
Expr substitute(const Substitution & subs, Expr e)
extern unsigned int substs;
extern unsigned int substsCached;
extern bool closedTerms;
extern bool substCache;
static Expr substitute(ATermMap & done, const Substitution & subs, Expr e);
static Expr substitute2(ATermMap & done, const Substitution & subs, Expr e)
{
checkInterrupt();
@ -116,19 +126,20 @@ Expr substitute(const Substitution & subs, Expr e)
ATerm name, pos, e2;
substs++;
/* As an optimisation, don't substitute in subterms known to be
closed. */
if (matchClosed(e, e2)) return e;
if (closedTerms && matchClosed(e, e2)) return e;
if (matchVar(e, name)) {
Expr sub = subs.lookup(name);
if (sub == makeRemoved()) sub = 0;
Expr wrapped;
/* Add a "closed" wrapper around terms that aren't already
closed. The check is necessary to prevent repeated
wrapping, e.g., closed(closed(closed(...))), which kills
caching. */
return sub ? (matchClosed(sub, wrapped) ? sub : makeClosed(sub)) : e;
return sub ? ((!closedTerms || matchClosed(sub, wrapped)) ? sub : makeClosed(sub)) : e;
}
/* In case of a function, filter out all variables bound by this
@ -140,18 +151,30 @@ Expr substitute(const Substitution & subs, Expr e)
for (ATermIterator i(formals); i; ++i) {
ATerm d1, d2;
if (!matchFormal(*i, name, d1, d2)) abort();
map.set(name, makeRemoved());
if (subs.lookup(name))
map.set(name, constRemoved);
}
if (map.size() == 0)
return makeFunction(
(ATermList) substitute(done, subs, (ATerm) formals),
substitute(done, subs, body), pos);
else {
Substitution subs2(&subs, &map);
ATermMap done2(128);
return makeFunction(
(ATermList) substitute(done2, subs2, (ATerm) formals),
substitute(done2, subs2, body), pos);
}
Substitution subs2(&subs, &map);
return makeFunction(
(ATermList) substitute(subs2, (ATerm) formals),
substitute(subs2, body), pos);
}
if (matchFunction1(e, name, body, pos)) {
ATermMap map(1);
map.set(name, makeRemoved());
return makeFunction1(name, substitute(Substitution(&subs, &map), body), pos);
if (subs.lookup(name)) {
ATermMap map(1);
map.set(name, constRemoved);
ATermMap done2(128);
return makeFunction1(name, substitute(done2, Substitution(&subs, &map), body), pos);
} else
return makeFunction1(name, substitute(done, subs, body), pos);
}
/* Idem for a mutually recursive attribute set. */
@ -159,14 +182,21 @@ Expr substitute(const Substitution & subs, Expr e)
if (matchRec(e, rbnds, nrbnds)) {
ATermMap map(ATgetLength(rbnds) + ATgetLength(nrbnds));
for (ATermIterator i(rbnds); i; ++i)
if (matchBind(*i, name, e2, pos)) map.set(name, makeRemoved());
else abort(); /* can't happen */
if (matchBind(*i, name, e2, pos) && subs.lookup(name))
map.set(name, constRemoved);
for (ATermIterator i(nrbnds); i; ++i)
if (matchBind(*i, name, e2, pos)) map.set(name, makeRemoved());
else abort(); /* can't happen */
return makeRec(
(ATermList) substitute(Substitution(&subs, &map), (ATerm) rbnds),
(ATermList) substitute(subs, (ATerm) nrbnds));
if (matchBind(*i, name, e2, pos) && subs.lookup(name))
map.set(name, constRemoved);
if (map.size() == 0)
return makeRec(
(ATermList) substitute(done, subs, (ATerm) rbnds),
(ATermList) substitute(done, subs, (ATerm) nrbnds));
else {
ATermMap done2(128);
return makeRec(
(ATermList) substitute(done2, Substitution(&subs, &map), (ATerm) rbnds),
(ATermList) substitute(done, subs, (ATerm) nrbnds));
}
}
if (ATgetType(e) == AT_APPL) {
@ -177,7 +207,73 @@ Expr substitute(const Substitution & subs, Expr e)
for (int i = 0; i < arity; ++i) {
ATerm arg = ATgetArgument(e, i);
args[i] = substitute(subs, arg);
args[i] = substitute(done, subs, arg);
if (args[i] != arg) changed = true;
}
return changed ? (ATerm) ATmakeApplArray(fun, args) : e;
}
if (ATgetType(e) == AT_LIST) {
unsigned int len = ATgetLength((ATermList) e);
ATerm es[len];
ATermIterator i((ATermList) e);
bool changed = false;
for (unsigned int j = 0; i; ++i, ++j) {
es[j] = substitute(done, subs, *i);
if (es[j] != *i) changed = true;
}
if (!changed) return e;
ATermList out = ATempty;
for (unsigned int j = len; j; --j)
out = ATinsert(out, es[j - 1]);
return (ATerm) out;
}
return e;
}
static Expr substitute(ATermMap & done, const Substitution & subs, Expr e)
{
Expr res = done[e];
if (substCache && res) {
substsCached++;
return res;
}
res = substitute2(done, subs, e);
done.set(e, res);
return res;
}
Expr substitute(const Substitution & subs, Expr e)
{
ATermMap done(256);
return substitute(done, subs, e);
}
Expr allocCells(Expr e)
{
checkInterrupt();
ATerm e2;
if (matchClosed(e, e2)) return e;
int i;
if (matchCell(e, i, e2))
return allocCell(allocCells(e2));
if (ATgetType(e) == AT_APPL) {
AFun fun = ATgetAFun(e);
int arity = ATgetArity(fun);
ATerm args[arity];
bool changed = false;
for (int i = 0; i < arity; ++i) {
ATerm arg = ATgetArgument(e, i);
args[i] = allocCells(arg);
if (args[i] != arg) changed = true;
}
@ -189,7 +285,7 @@ Expr substitute(const Substitution & subs, Expr e)
ATerm es[len];
ATermIterator i((ATermList) e);
for (unsigned int j = 0; i; ++i, ++j)
es[j] = substitute(subs, *i);
es[j] = allocCells(*i);
ATermList out = ATempty;
for (unsigned int j = len; j; --j)
out = ATinsert(out, es[j - 1]);
@ -399,5 +495,18 @@ string showValue(Expr e)
return "<unknown>";
}
static unsigned int cellCount = 0;
Expr allocCell(Expr e)
{
if (cacheTerms != 2) return e;
int i;
Expr e2;
if (matchCell(e, i, e2)) return e;
return makeCell(cellCount++, e);
}
}

View File

@ -34,6 +34,9 @@ typedef ATerm Pos;
typedef vector<ATerm> ATermVector;
extern Expr constRemoved;
/* A substitution is a linked list of ATermMaps that map names to
identifiers. We use a list of ATermMaps rather than a single to
make it easy to grow or shrink a substitution when entering a
@ -53,7 +56,8 @@ struct Substitution
{
Expr x;
for (const Substitution * s(this); s; s = s->prev)
if ((x = s->map->get(name))) return x;
if ((x = s->map->get(name)))
return x == constRemoved ? 0 : x;
return 0;
}
};
@ -116,6 +120,11 @@ string showType(Expr e);
string showValue(Expr e);
Expr allocCell(Expr e); // make an updateable cell (for simulating conventional laziness)
Expr allocCells(Expr e); // re-allocate all cells in e
}

View File

@ -57,7 +57,7 @@ static Expr fixAttrs(int recursive, ATermList as)
bool fromScope = matchScope(src);
for (ATermIterator j(names); j; ++j) {
Expr rhs = fromScope ? makeVar(*j) : makeSelect(src, *j);
*is = ATinsert(*is, makeBind(*j, rhs, pos));
*is = ATinsert(*is, makeBind(*j, allocCell(rhs), pos));
}
} else bs = ATinsert(bs, *i);
}
@ -70,11 +70,13 @@ static Expr fixAttrs(int recursive, ATermList as)
void backToString(yyscan_t scanner);
extern bool posInfo;
static Pos makeCurPos(YYLTYPE * loc, ParseData * data)
{
return makePos(toATerm(data->path),
loc->first_line, loc->first_column);
return posInfo ? makePos(toATerm(data->path),
loc->first_line, loc->first_column) : makeNoPos();
}
#define CUR_POS makeCurPos(yylocp, data)
@ -226,7 +228,7 @@ binds
bind
: ID '=' expr ';'
{ $$ = makeBind($1, $3, CUR_POS); }
{ $$ = makeBind($1, allocCell($3), CUR_POS); }
| INHERIT inheritsrc ids ';'
{ $$ = makeInherit($2, $3, CUR_POS); }
;
@ -383,8 +385,13 @@ Expr parseExprFromFile(EvalState & state, Path path)
if (S_ISDIR(st.st_mode))
path = canonPath(path + "/default.nix");
Expr cached = state.parsings.get(toATerm(path));
if (cached) return cached;
/* Read and parse the input file. */
return parse(state, readFile(path).c_str(), path, dirOf(path));
cached = parse(state, readFile(path).c_str(), path, dirOf(path));
state.parsings.set(toATerm(path), cached);
return cached;
}

View File

@ -564,7 +564,7 @@ static Expr prim_derivationLazy(EvalState & state, const ATermVector & args)
attrs.set(toATerm("type"),
makeAttrRHS(makeStr("derivation"), makeNoPos()));
Expr drvStrict = makeCall(makeVar(toATerm("derivation!")), eAttrs);
Expr drvStrict = allocCell(makeCall(makeVar(toATerm("derivation!")), eAttrs));
attrs.set(toATerm("outPath"),
makeAttrRHS(makeSelect(drvStrict, toATerm("outPath")), makeNoPos()));
@ -773,7 +773,7 @@ static Expr prim_listToAttrs(EvalState & state, const ATermVector & args)
Expr e = evalExpr(state, makeSelect(evaledExpr, toATerm("name")));
string attr = evalStringNoCtx(state,e);
Expr r = makeSelect(evaledExpr, toATerm("value"));
res.set(toATerm(attr), makeAttrRHS(r, makeNoPos()));
res.set(toATerm(attr), makeAttrRHS(allocCell(r), makeNoPos()));
}
else
throw TypeError(format("list element in `listToAttrs' is %s, expected a set { name = \"<name>\"; value = <value>; }")
@ -927,6 +927,17 @@ static Expr prim_stringLength(EvalState & state, const ATermVector & args)
}
/*************************************************************
* Strictness
*************************************************************/
static Expr prim_strict(EvalState & state, const ATermVector & args)
{
return evalExpr(state, makeCall(args[0], evalExpr(state, args[1])));
}
/*************************************************************
* Primop registration
*************************************************************/
@ -993,6 +1004,9 @@ void EvalState::addPrimOps()
addPrimOp("toString", 1, prim_toString);
addPrimOp("__substring", 3, prim_substring);
addPrimOp("__stringLength", 1, prim_stringLength);
// Strictness
addPrimOp("strict", 2, prim_strict);
}