Use BSD instead of POSIX file locks

POSIX file locks are essentially incompatible with multithreading. BSD locks have much saner semantics. We need this now that there can be multiple concurrent LocalStore::buildPaths() invocations.
2018-10-04 15:03:03 +02:00 · 2018-10-04 15:03:03 +02:00 · e349f2c0a3
parent ec415d7166
commit e349f2c0a3
5 changed files with 45 additions and 107 deletions
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@ -3984,17 +3984,6 @@ void SubstitutionGoal::tryToRun()
        return;
    }

-    /* If the store path is already locked (probably by a
-       DerivationGoal), then put this goal to sleep. Note: we don't
-       acquire a lock here since that breaks addToStore(), so below we
-       handle an AlreadyLocked exception from addToStore(). The check
-       here is just an optimisation to prevent having to redo a
-       download due to a locked path. */
-    if (pathIsLockedByMe(worker.store.toRealPath(storePath))) {
-        worker.waitForAWhile(shared_from_this());
-        return;
-    }
-
    maintainRunningSubstitutions = std::make_unique<MaintainCount<uint64_t>>(worker.runningSubstitutions);
    worker.updateProgress();

@ -4034,12 +4023,6 @@ void SubstitutionGoal::finished()

    try {
        promise.get_future().get();
-    } catch (AlreadyLocked & e) {
-        /* Probably a DerivationGoal is already building this store
-           path. Sleep for a while and try again. */
-        state = &SubstitutionGoal::init;
-        worker.waitForAWhile(shared_from_this());
-        return;
    } catch (std::exception & e) {
        printError(e.what());

--- a/src/libstore/gc.cc
+++ b/src/libstore/gc.cc
@ -29,7 +29,7 @@ static string gcRootsDir = "gcroots";
   read.  To be precise: when they try to create a new temporary root
   file, they will block until the garbage collector has finished /
   yielded the GC lock. */
-int LocalStore::openGCLock(LockType lockType)
+AutoCloseFD LocalStore::openGCLock(LockType lockType)
 {
    Path fnGCLock = (format("%1%/%2%")
        % stateDir % gcLockName).str();
@ -49,7 +49,7 @@ int LocalStore::openGCLock(LockType lockType)
       process that can open the file for reading can DoS the
       collector. */

-    return fdGCLock.release();
+    return fdGCLock;
 }


--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@ -263,7 +263,7 @@ private:
    bool isActiveTempFile(const GCState & state,
        const Path & path, const string & suffix);

-    int openGCLock(LockType lockType);
+    AutoCloseFD openGCLock(LockType lockType);

    void findRoots(const Path & path, unsigned char type, Roots & roots);

--- a/src/libstore/pathlocks.cc
+++ b/src/libstore/pathlocks.cc
@ -7,7 +7,7 @@

 #include <sys/types.h>
 #include <sys/stat.h>
-#include <fcntl.h>
+#include <sys/file.h>


 namespace nix {
@ -40,17 +40,14 @@ void deleteLockFile(const Path & path, int fd)

 bool lockFile(int fd, LockType lockType, bool wait)
 {
-    struct flock lock;
-    if (lockType == ltRead) lock.l_type = F_RDLCK;
-    else if (lockType == ltWrite) lock.l_type = F_WRLCK;
-    else if (lockType == ltNone) lock.l_type = F_UNLCK;
+    int type;
+    if (lockType == ltRead) type = LOCK_SH;
+    else if (lockType == ltWrite) type = LOCK_EX;
+    else if (lockType == ltNone) type = LOCK_UN;
    else abort();
-    lock.l_whence = SEEK_SET;
-    lock.l_start = 0;
-    lock.l_len = 0; /* entire file */

    if (wait) {
-        while (fcntl(fd, F_SETLKW, &lock) != 0) {
+        while (flock(fd, type) != 0) {
            checkInterrupt();
            if (errno != EINTR)
                throw SysError(format("acquiring/releasing lock"));
@ -58,9 +55,9 @@ bool lockFile(int fd, LockType lockType, bool wait)
                return false;
        }
    } else {
-        while (fcntl(fd, F_SETLK, &lock) != 0) {
+        while (flock(fd, type | LOCK_NB) != 0) {
            checkInterrupt();
-            if (errno == EACCES || errno == EAGAIN) return false;
+            if (errno == EWOULDBLOCK) return false;
            if (errno != EINTR)
                throw SysError(format("acquiring/releasing lock"));
        }
@ -70,14 +67,6 @@ bool lockFile(int fd, LockType lockType, bool wait)
 }


-/* This enables us to check whether are not already holding a lock on
-   a file ourselves.  POSIX locks (fcntl) suck in this respect: if we
-   close a descriptor, the previous lock will be closed as well.  And
-   there is no way to query whether we already have a lock (F_GETLK
-   only works on locks held by other processes). */
-static Sync<StringSet> lockedPaths_;
-
-
 PathLocks::PathLocks()
    : deletePaths(false)
 {
@ -91,7 +80,7 @@ PathLocks::PathLocks(const PathSet & paths, const string & waitMsg)
 }


-bool PathLocks::lockPaths(const PathSet & _paths,
+bool PathLocks::lockPaths(const PathSet & paths,
    const string & waitMsg, bool wait)
 {
    assert(fds.empty());
@ -99,75 +88,54 @@ bool PathLocks::lockPaths(const PathSet & _paths,
    /* Note that `fds' is built incrementally so that the destructor
       will only release those locks that we have already acquired. */

-    /* Sort the paths.  This assures that locks are always acquired in
-       the same order, thus preventing deadlocks. */
-    Paths paths(_paths.begin(), _paths.end());
-    paths.sort();
-
-    /* Acquire the lock for each path. */
+    /* Acquire the lock for each path in sorted order. This ensures
+       that locks are always acquired in the same order, thus
+       preventing deadlocks. */
    for (auto & path : paths) {
        checkInterrupt();
        Path lockPath = path + ".lock";

        debug(format("locking path '%1%'") % path);

-        {
-            auto lockedPaths(lockedPaths_.lock());
-            if (lockedPaths->count(lockPath)) {
-                if (!wait) return false;
-                throw AlreadyLocked("deadlock: trying to re-acquire self-held lock '%s'", lockPath);
-            }
-            lockedPaths->insert(lockPath);
-        }
+        AutoCloseFD fd;

-        try {
+        while (1) {

-            AutoCloseFD fd;
+            /* Open/create the lock file. */
+            fd = openLockFile(lockPath, true);

-            while (1) {
-
-                /* Open/create the lock file. */
-                fd = openLockFile(lockPath, true);
-
-                /* Acquire an exclusive lock. */
-                if (!lockFile(fd.get(), ltWrite, false)) {
-                    if (wait) {
-                        if (waitMsg != "") printError(waitMsg);
-                        lockFile(fd.get(), ltWrite, true);
-                    } else {
-                        /* Failed to lock this path; release all other
-                           locks. */
-                        unlock();
-                        lockedPaths_.lock()->erase(lockPath);
-                        return false;
-                    }
+            /* Acquire an exclusive lock. */
+            if (!lockFile(fd.get(), ltWrite, false)) {
+                if (wait) {
+                    if (waitMsg != "") printError(waitMsg);
+                    lockFile(fd.get(), ltWrite, true);
+                } else {
+                    /* Failed to lock this path; release all other
+                       locks. */
+                    unlock();
+                    return false;
                }
-
-                debug(format("lock acquired on '%1%'") % lockPath);
-
-                /* Check that the lock file hasn't become stale (i.e.,
-                   hasn't been unlinked). */
-                struct stat st;
-                if (fstat(fd.get(), &st) == -1)
-                    throw SysError(format("statting lock file '%1%'") % lockPath);
-                if (st.st_size != 0)
-                    /* This lock file has been unlinked, so we're holding
-                       a lock on a deleted file.  This means that other
-                       processes may create and acquire a lock on
-                       `lockPath', and proceed.  So we must retry. */
-                    debug(format("open lock file '%1%' has become stale") % lockPath);
-                else
-                    break;
            }

-            /* Use borrow so that the descriptor isn't closed. */
-            fds.push_back(FDPair(fd.release(), lockPath));
+            debug(format("lock acquired on '%1%'") % lockPath);

-        } catch (...) {
-            lockedPaths_.lock()->erase(lockPath);
-            throw;
+            /* Check that the lock file hasn't become stale (i.e.,
+               hasn't been unlinked). */
+            struct stat st;
+            if (fstat(fd.get(), &st) == -1)
+                throw SysError(format("statting lock file '%1%'") % lockPath);
+            if (st.st_size != 0)
+                /* This lock file has been unlinked, so we're holding
+                   a lock on a deleted file.  This means that other
+                   processes may create and acquire a lock on
+                   `lockPath', and proceed.  So we must retry. */
+                debug(format("open lock file '%1%' has become stale") % lockPath);
+            else
+                break;
        }

+        /* Use borrow so that the descriptor isn't closed. */
+        fds.push_back(FDPair(fd.release(), lockPath));
    }

    return true;
@ -189,8 +157,6 @@ void PathLocks::unlock()
    for (auto & i : fds) {
        if (deletePaths) deleteLockFile(i.second, i.first);

-        lockedPaths_.lock()->erase(i.second);
-
        if (close(i.first) == -1)
            printError(
                format("error (ignored): cannot close lock file on '%1%'") % i.second);
@ -208,11 +174,4 @@ void PathLocks::setDeletion(bool deletePaths)
 }


-bool pathIsLockedByMe(const Path & path)
-{
-    Path lockPath = path + ".lock";
-    return lockedPaths_.lock()->count(lockPath);
-}
-
-
 }
--- a/src/libstore/pathlocks.hh
+++ b/src/libstore/pathlocks.hh
@ -16,8 +16,6 @@ enum LockType { ltRead, ltWrite, ltNone };

 bool lockFile(int fd, LockType lockType, bool wait);

-MakeError(AlreadyLocked, Error);
-
 class PathLocks
 {
 private:
@ -37,6 +35,4 @@ public:
    void setDeletion(bool deletePaths);
 };

-bool pathIsLockedByMe(const Path & path);
-
 }