Propagate remote timeouts properly

This commit is contained in:
Eelco Dolstra 2014-08-17 19:09:03 +02:00
parent c6e85ee474
commit 42e9ad8fd1
2 changed files with 38 additions and 31 deletions

View file

@ -258,13 +258,8 @@ writeInt($maxSilentTime, $to);
writeInt($buildTimeout, $to);
my $res = readInt($from);
if ($res != 0) {
# Note that if we get exit code 100 from `nix-store -r', it
# denotes a permanent build failure (as opposed to an SSH problem
# or a temporary Nix problem). We propagate this to the caller to
# allow it to distinguish between transient and permanent
# failures.
my $msg = readString($from);
print STDERR "error: $msg (on `$hostName')\n";
print STDERR "error: $msg on `$hostName'\n";
exit $res;
}

View file

@ -238,6 +238,9 @@ public:
failure). */
bool permanentFailure;
/* Set if at least one derivation had a timeout. */
bool timedOut;
LocalStore & store;
std::shared_ptr<HookInstance> hook;
@ -1440,33 +1443,39 @@ void DerivationGoal::buildDone()
outputLocks.unlock();
buildUser.release();
/* When using a build hook, the hook will return a remote
build failure using exit code 100. Anything else is a hook
problem. */
bool hookError = hook &&
(!WIFEXITED(status) || WEXITSTATUS(status) != 100);
if (settings.printBuildTrace) {
if (hook && hookError)
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
% drvPath % status % e.msg());
else
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
% drvPath % 1 % e.msg());
if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) {
if (settings.printBuildTrace)
printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath);
worker.timedOut = true;
}
/* Register the outputs of this build as "failed" so we won't
try to build them again (negative caching). However, don't
do this for fixed-output derivations, since they're likely
to fail for transient reasons (e.g., fetchurl not being
able to access the network). Hook errors (like
communication problems with the remote machine) shouldn't
be cached either. */
if (settings.cacheFailure && !hookError && !fixedOutput)
foreach (DerivationOutputs::iterator, i, drv.outputs)
worker.store.registerFailedPath(i->second.path);
else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) {
if (settings.printBuildTrace)
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
% drvPath % status % e.msg());
}
else {
if (settings.printBuildTrace)
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
% drvPath % 1 % e.msg());
worker.permanentFailure = !fixedOutput && !diskFull;
/* Register the outputs of this build as "failed" so we
won't try to build them again (negative caching).
However, don't do this for fixed-output derivations,
since they're likely to fail for transient reasons
(e.g., fetchurl not being able to access the network).
Hook errors (like communication problems with the
remote machine) shouldn't be cached either. */
if (/* settings.cacheFailure && */ !fixedOutput && !diskFull)
{
printMsg(lvlError, "REG");
foreach (DerivationOutputs::iterator, i, drv.outputs)
worker.store.registerFailedPath(i->second.path);
}
}
worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
amDone(ecFailed);
return;
}
@ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store)
nrLocalBuilds = 0;
lastWokenUp = 0;
permanentFailure = false;
timedOut = false;
}
@ -3220,6 +3230,7 @@ void Worker::waitForInput()
format("%1% timed out after %2% seconds of silence")
% goal->getName() % settings.maxSilentTime);
goal->cancel(true);
timedOut = true;
}
else if (goal->getExitCode() == Goal::ecBusy &&
@ -3231,6 +3242,7 @@ void Worker::waitForInput()
format("%1% timed out after %2% seconds")
% goal->getName() % settings.buildTimeout);
goal->cancel(true);
timedOut = true;
}
}
@ -3247,7 +3259,7 @@ void Worker::waitForInput()
unsigned int Worker::exitStatus()
{
return permanentFailure ? 100 : 1;
return timedOut ? 101 : (permanentFailure ? 100 : 1);
}