nix run: Fix chroot execution

Running "nix run" with a diverted store, e.g.

  $ nix run --store local?root=/tmp/nix nixpkgs.hello

stopped working when Nix became multithreaded, because
unshare(CLONE_NEWUSER) doesn't work in multithreaded processes. The
obvious solution is to terminate all other threads first, but 1) there
is no way to terminate Boehm GC marker threads; and 2) it appears that
the kernel has a race where unshare(CLONE_NEWUSER) will still fail for
some indeterminate amount of time after joining other threads.

So instead, "nix run" will now exec() a single-threaded helper ("nix
__run_in_chroot") that performs the actual unshare()/chroot()/exec().
This commit is contained in:
Eelco Dolstra 2017-08-29 13:21:07 +02:00
parent 1c58e13bee
commit 93a5ef0516
No known key found for this signature in database
GPG key ID: 8170B4726D7198DE
2 changed files with 110 additions and 60 deletions

View file

@ -9,6 +9,10 @@
#include "store-api.hh"
#include "progress-bar.hh"
extern std::string chrootHelperName;
void chrootHelper(int argc, char * * argv);
namespace nix {
struct NixArgs : virtual MultiCommand, virtual MixCommonArgs
@ -57,6 +61,13 @@ void mainWrapped(int argc, char * * argv)
verbosity = lvlError;
settings.verboseBuild = false;
/* The chroot helper needs to be run before any threads have been
started. */
if (argc > 0 && argv[0] == chrootHelperName) {
chrootHelper(argc, argv);
return;
}
initNix();
initGC();

View file

@ -5,6 +5,7 @@
#include "derivations.hh"
#include "local-store.hh"
#include "finally.hh"
#include "fs-accessor.hh"
#if __linux__
#include <sys/mount.h>
@ -12,6 +13,8 @@
using namespace nix;
std::string chrootHelperName = "__run_in_chroot";
struct CmdRun : InstallablesCommand
{
CmdRun()
@ -32,73 +35,109 @@ struct CmdRun : InstallablesCommand
{
auto outPaths = toStorePaths(store, Build);
auto store2 = store.dynamic_pointer_cast<LocalStore>();
if (store2 && store->storeDir != store2->realStoreDir) {
#if __linux__
uid_t uid = getuid();
uid_t gid = getgid();
if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == -1)
throw SysError("setting up a private mount namespace");
/* Bind-mount realStoreDir on /nix/store. If the latter
mount point doesn't already exists, we have to create a
chroot environment containing the mount point and bind
mounts for the children of /. Would be nice if we could
use overlayfs here, but that doesn't work in a user
namespace yet (Ubuntu has a patch for this:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1478578). */
if (!pathExists(store->storeDir)) {
// FIXME: Use overlayfs?
Path tmpDir = createTempDir();
createDirs(tmpDir + store->storeDir);
if (mount(store2->realStoreDir.c_str(), (tmpDir + store->storeDir).c_str(), "", MS_BIND, 0) == -1)
throw SysError(format("mounting '%s' on '%s'") % store2->realStoreDir % store->storeDir);
for (auto entry : readDirectory("/")) {
Path dst = tmpDir + "/" + entry.name;
if (pathExists(dst)) continue;
if (mkdir(dst.c_str(), 0700) == -1)
throw SysError(format("creating directory '%s'") % dst);
if (mount(("/" + entry.name).c_str(), dst.c_str(), "", MS_BIND | MS_REC, 0) == -1)
throw SysError(format("mounting '%s' on '%s'") % ("/" + entry.name) % dst);
}
char * cwd = getcwd(0, 0);
if (!cwd) throw SysError("getting current directory");
Finally freeCwd([&]() { free(cwd); });
if (chroot(tmpDir.c_str()) == -1)
throw SysError(format("chrooting into '%s'") % tmpDir);
if (chdir(cwd) == -1)
throw SysError(format("chdir to '%s' in chroot") % cwd);
} else
if (mount(store2->realStoreDir.c_str(), store->storeDir.c_str(), "", MS_BIND, 0) == -1)
throw SysError(format("mounting '%s' on '%s'") % store2->realStoreDir % store->storeDir);
writeFile("/proc/self/setgroups", "deny");
writeFile("/proc/self/uid_map", (format("%d %d %d") % uid % uid % 1).str());
writeFile("/proc/self/gid_map", (format("%d %d %d") % gid % gid % 1).str());
#else
throw Error(format("mounting the Nix store on '%s' is not supported on this platform") % store->storeDir);
#endif
}
auto accessor = store->getFSAccessor();
auto unixPath = tokenizeString<Strings>(getEnv("PATH"), ":");
for (auto & path : outPaths)
if (pathExists(path + "/bin"))
if (accessor->stat(path + "/bin").type != FSAccessor::tMissing)
unixPath.push_front(path + "/bin");
setenv("PATH", concatStringsSep(":", unixPath).c_str(), 1);
if (execlp("bash", "bash", nullptr) == -1)
throw SysError("unable to exec 'bash'");
std::string cmd = "bash";
Strings args = { cmd };
/* If this is a diverted store (i.e. its "logical" location
(typically /nix/store) differs from its "physical" location
(e.g. /home/eelco/nix/store), then run the command in a
chroot. For non-root users, this requires running it in new
mount and user namespaces. Unfortunately,
unshare(CLONE_NEWUSER) doesn't work in a multithreaded
program (which "nix" is), so we exec() a single-threaded
helper program (chrootHelper() below) to do the work. */
auto store2 = store.dynamic_pointer_cast<LocalStore>();
if (store2 && store->storeDir != store2->realStoreDir) {
Strings helperArgs = { chrootHelperName, store->storeDir, store2->realStoreDir, cmd };
for (auto & arg : args) helperArgs.push_back(arg);
execv(readLink("/proc/self/exe").c_str(), stringsToCharPtrs(helperArgs).data());
throw SysError("could not execute chroot helper");
}
execvp(cmd.c_str(), stringsToCharPtrs(args).data());
throw SysError("unable to exec '%s'", cmd);
}
};
static RegisterCommand r1(make_ref<CmdRun>());
void chrootHelper(int argc, char * * argv)
{
int p = 1;
std::string storeDir = argv[p++];
std::string realStoreDir = argv[p++];
std::string cmd = argv[p++];
Strings args;
while (p < argc)
args.push_back(argv[p++]);
#if __linux__
uid_t uid = getuid();
uid_t gid = getgid();
if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == -1)
throw SysError("setting up a private mount namespace");
/* Bind-mount realStoreDir on /nix/store. If the latter mount
point doesn't already exists, we have to create a chroot
environment containing the mount point and bind mounts for the
children of /. Would be nice if we could use overlayfs here,
but that doesn't work in a user namespace yet (Ubuntu has a
patch for this:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1478578). */
if (true /* !pathExists(storeDir) */) {
// FIXME: Use overlayfs?
Path tmpDir = createTempDir();
createDirs(tmpDir + storeDir);
if (mount(realStoreDir.c_str(), (tmpDir + storeDir).c_str(), "", MS_BIND, 0) == -1)
throw SysError("mounting '%s' on '%s'", realStoreDir, storeDir);
for (auto entry : readDirectory("/")) {
Path dst = tmpDir + "/" + entry.name;
if (pathExists(dst)) continue;
if (mkdir(dst.c_str(), 0700) == -1)
throw SysError(format("creating directory '%s'") % dst);
if (mount(("/" + entry.name).c_str(), dst.c_str(), "", MS_BIND | MS_REC, 0) == -1)
throw SysError(format("mounting '%s' on '%s'") % ("/" + entry.name) % dst);
}
char * cwd = getcwd(0, 0);
if (!cwd) throw SysError("getting current directory");
Finally freeCwd([&]() { free(cwd); });
if (chroot(tmpDir.c_str()) == -1)
throw SysError(format("chrooting into '%s'") % tmpDir);
if (chdir(cwd) == -1)
throw SysError(format("chdir to '%s' in chroot") % cwd);
} else
if (mount(realStoreDir.c_str(), storeDir.c_str(), "", MS_BIND, 0) == -1)
throw SysError("mounting '%s' on '%s'", realStoreDir, storeDir);
writeFile("/proc/self/setgroups", "deny");
writeFile("/proc/self/uid_map", fmt("%d %d %d", uid, uid, 1));
writeFile("/proc/self/gid_map", fmt("%d %d %d", gid, gid, 1));
execvp(cmd.c_str(), stringsToCharPtrs(args).data());
throw SysError("unable to exec '%s'", cmd);
#else
throw Error("mounting the Nix store on '%s' is not supported on this platform", >storeDir);
#endif
}