diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 5cbce0748..cb820e2d5 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -149,11 +149,12 @@ void LocalStore::addTempRoot(const StorePath & path) try { nix::connect(fdRootsSocket->get(), socketPath); } catch (SysError & e) { - /* The garbage collector may have exited, so we need to - restart. */ - if (e.errNo == ECONNREFUSED) { - debug("GC socket connection refused"); + /* The garbage collector may have exited or not + created the socket yet, so we need to restart. */ + if (e.errNo == ECONNREFUSED || e.errNo == ENOENT) { + debug("GC socket connection refused: %s", e.msg()); fdRootsSocket->close(); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); goto restart; } throw; @@ -509,6 +510,11 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) auto fdGCLock = openGCLock(); FdLock gcLock(fdGCLock.get(), ltWrite, true, "waiting for the big garbage collector lock..."); + /* Synchronisation point to test ENOENT handling in + addTempRoot(), see tests/gc-non-blocking.sh. */ + if (auto p = getEnv("_NIX_TEST_GC_SYNC_1")) + readFile(*p); + /* Start the server for receiving new roots. */ auto socketPath = stateDir.get() + gcSocketPath; createDirs(dirOf(socketPath)); @@ -632,6 +638,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) roots.insert(root.first); } + /* Synchronisation point for testing, see tests/functional/gc-non-blocking.sh. */ + if (auto p = getEnv("_NIX_TEST_GC_SYNC_2")) + readFile(*p); + /* Helper function that deletes a path from the store and throws GCLimitReached if we've deleted enough garbage. */ auto deleteFromStore = [&](std::string_view baseName) @@ -778,10 +788,6 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) } }; - /* Synchronisation point for testing, see tests/functional/gc-concurrent.sh. */ - if (auto p = getEnv("_NIX_TEST_GC_SYNC")) - readFile(*p); - /* Either delete all garbage paths, or just the specified paths (for gcDeleteSpecific). */ if (options.action == GCOptions::gcDeleteSpecific) { diff --git a/src/libutil/unix-domain-socket.cc b/src/libutil/unix-domain-socket.cc index 8949461d2..05bbb5ba3 100644 --- a/src/libutil/unix-domain-socket.cc +++ b/src/libutil/unix-domain-socket.cc @@ -1,6 +1,7 @@ #include "file-system.hh" #include "processes.hh" #include "unix-domain-socket.hh" +#include "util.hh" #include #include @@ -75,21 +76,35 @@ void connect(int fd, const std::string & path) addr.sun_family = AF_UNIX; if (path.size() + 1 >= sizeof(addr.sun_path)) { + Pipe pipe; + pipe.create(); Pid pid = startProcess([&]() { - Path dir = dirOf(path); - if (chdir(dir.c_str()) == -1) - throw SysError("chdir to '%s' failed", dir); - std::string base(baseNameOf(path)); - if (base.size() + 1 >= sizeof(addr.sun_path)) - throw Error("socket path '%s' is too long", base); - memcpy(addr.sun_path, base.c_str(), base.size() + 1); - if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) - throw SysError("cannot connect to socket at '%s'", path); - _exit(0); + try { + pipe.readSide.close(); + Path dir = dirOf(path); + if (chdir(dir.c_str()) == -1) + throw SysError("chdir to '%s' failed", dir); + std::string base(baseNameOf(path)); + if (base.size() + 1 >= sizeof(addr.sun_path)) + throw Error("socket path '%s' is too long", base); + memcpy(addr.sun_path, base.c_str(), base.size() + 1); + if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) + throw SysError("cannot connect to socket at '%s'", path); + writeFull(pipe.writeSide.get(), "0\n"); + } catch (SysError & e) { + writeFull(pipe.writeSide.get(), fmt("%d\n", e.errNo)); + } catch (...) { + writeFull(pipe.writeSide.get(), "-1\n"); + } }); - int status = pid.wait(); - if (status != 0) + pipe.writeSide.close(); + auto errNo = string2Int(chomp(drainFD(pipe.readSide.get()))); + if (!errNo || *errNo == -1) throw Error("cannot connect to socket at '%s'", path); + else if (*errNo > 0) { + errno = *errNo; + throw SysError("cannot connect to socket at '%s'", path); + } } else { memcpy(addr.sun_path, path.c_str(), path.size() + 1); if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) diff --git a/tests/functional/gc-non-blocking.sh b/tests/functional/gc-non-blocking.sh index 0d781485d..ec280badb 100644 --- a/tests/functional/gc-non-blocking.sh +++ b/tests/functional/gc-non-blocking.sh @@ -6,27 +6,42 @@ needLocalStore "the GC test needs a synchronisation point" clearStore -fifo=$TEST_ROOT/test.fifo -mkfifo "$fifo" +# This FIFO is read just after the global GC lock has been acquired, +# but before the root server is started. +fifo1=$TEST_ROOT/test2.fifo +mkfifo "$fifo1" + +# This FIFO is read just after the roots have been read, but before +# the actual GC starts. +fifo2=$TEST_ROOT/test.fifo +mkfifo "$fifo2" dummy=$(nix store add-path ./simple.nix) running=$TEST_ROOT/running touch $running -(_NIX_TEST_GC_SYNC=$fifo nix-store --gc -vvvvv; rm $running) & +# Start GC. +(_NIX_TEST_GC_SYNC_1=$fifo1 _NIX_TEST_GC_SYNC_2=$fifo2 nix-store --gc -vvvvv; rm $running) & pid=$! sleep 2 +# Delay the start of the root server to check that the build below +# correctly handles ENOENT when connecting to the root server. +(sleep 1; echo > $fifo1) & +pid2=$! + +# Start a build. This should not be blocked by the GC in progress. outPath=$(nix-build --max-silent-time 60 -o "$TEST_ROOT/result" -E " with import ./config.nix; mkDerivation { name = \"non-blocking\"; - buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo\"; + buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo2\"; }") wait $pid +wait $pid2 (! test -e $running) (! test -e $dummy)