Fix 'deadlock: trying to re-acquire self-held lock'

This was caused by derivations with 'allowSubstitutes = false'. Such
derivations will be built locally. However, if there is another
SubstitionGoal that has the output of the first derivation in its
closure, then the path will be simultaneously built and substituted.

There was a check to catch this situation (via pathIsLockedByMe()),
but it no longer worked reliably because substitutions are now done in
another thread. (Thus the comment 'It can't happen between here and
the lockPaths() call below because we're not allowing multi-threading'
was no longer valid.)

The fix is to handle the path already being locked in both
SubstitutionGoal and DerivationGoal.
This commit is contained in:
Eelco Dolstra 2018-02-12 16:56:12 +01:00
parent 35fd31770c
commit 4f09ce7940
No known key found for this signature in database
GPG key ID: 8170B4726D7198DE
4 changed files with 24 additions and 22 deletions

View file

@ -1335,19 +1335,6 @@ void DerivationGoal::tryToBuild()
{
trace("trying to build");
/* Check for the possibility that some other goal in this process
has locked the output since we checked in haveDerivation().
(It can't happen between here and the lockPaths() call below
because we're not allowing multi-threading.) If so, put this
goal to sleep until another goal finishes, then try again. */
for (auto & i : drv->outputs)
if (pathIsLockedByMe(worker.store.toRealPath(i.second.path))) {
debug(format("putting derivation '%1%' to sleep because '%2%' is locked by another goal")
% drvPath % i.second.path);
worker.waitForAnyGoal(shared_from_this());
return;
}
/* Obtain locks on all output paths. The locks are automatically
released when we exit this function or Nix crashes. If we
can't acquire the lock, then continue; hopefully some other
@ -3739,6 +3726,17 @@ void SubstitutionGoal::tryToRun()
return;
}
/* If the store path is already locked (probably by a
DerivationGoal), then put this goal to sleep. Note: we don't
acquire a lock here since that breaks addToStore(), so below we
handle an AlreadyLocked exception from addToStore(). The check
here is just an optimisation to prevent having to redo a
download due to a locked path. */
if (pathIsLockedByMe(worker.store.toRealPath(storePath))) {
worker.waitForAWhile(shared_from_this());
return;
}
maintainRunningSubstitutions = std::make_unique<MaintainCount<uint64_t>>(worker.runningSubstitutions);
worker.updateProgress();
@ -3778,6 +3776,12 @@ void SubstitutionGoal::finished()
try {
promise.get_future().get();
} catch (AlreadyLocked & e) {
/* Probably a DerivationGoal is already building this store
path. Sleep for a while and try again. */
state = &SubstitutionGoal::init;
worker.waitForAWhile(shared_from_this());
return;
} catch (Error & e) {
printError(e.msg());

View file

@ -992,8 +992,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, const ref<std::string> &
/* Lock the output path. But don't lock if we're being called
from a build hook (whose parent process already acquired a
lock on this path). */
Strings locksHeld = tokenizeString<Strings>(getEnv("NIX_HELD_LOCKS"));
if (find(locksHeld.begin(), locksHeld.end(), info.path) == locksHeld.end())
static auto locksHeld = tokenizeString<PathSet>(getEnv("NIX_HELD_LOCKS"));
if (!locksHeld.count(info.path))
outputLock.lockPaths({realPath});
if (repair || !isValidPath(info.path)) {

View file

@ -113,8 +113,10 @@ bool PathLocks::lockPaths(const PathSet & _paths,
{
auto lockedPaths(lockedPaths_.lock());
if (lockedPaths->count(lockPath))
throw Error("deadlock: trying to re-acquire self-held lock '%s'", lockPath);
if (lockedPaths->count(lockPath)) {
if (!wait) return false;
throw AlreadyLocked("deadlock: trying to re-acquire self-held lock '%s'", lockPath);
}
lockedPaths->insert(lockPath);
}

View file

@ -2,10 +2,8 @@
#include "util.hh"
namespace nix {
/* Open (possibly create) a lock file and return the file descriptor.
-1 is returned if create is false and the lock could not be opened
because it doesn't exist. Any other error throws an exception. */
@ -18,6 +16,7 @@ enum LockType { ltRead, ltWrite, ltNone };
bool lockFile(int fd, LockType lockType, bool wait);
MakeError(AlreadyLocked, Error);
class PathLocks
{
@ -38,9 +37,6 @@ public:
void setDeletion(bool deletePaths);
};
// FIXME: not thread-safe!
bool pathIsLockedByMe(const Path & path);
}