Merge pull request #3600 from NixOS/auto-uid-allocation

Automatic UID allocation
This commit is contained in:
Eelco Dolstra 2022-11-29 14:01:42 +01:00 committed by GitHub
commit fbc53e97ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 859 additions and 203 deletions

View file

@ -10,3 +10,45 @@
This avoids a lot of spurious errors where some benign strings end-up having
a context just because they are read from a store path
([#7260](https://github.com/NixOS/nix/pull/7260)).
* Nix can now automatically pick UIDs for builds, removing the need to
create `nixbld*` user accounts. These UIDs are allocated starting at
872415232 (0x34000000) on Linux and 56930 on macOS.
This is an experimental feature. To enable it, add the following to
`nix.conf`:
```
extra-experimental-features = auto-allocate-uids
auto-allocate-uids = true
```
* On Linux, Nix can now run builds in a user namespace where the build
runs as root (UID 0) and has 65,536 UIDs available. This is
primarily useful for running containers such as `systemd-nspawn`
inside a Nix build. For an example, see
https://github.com/NixOS/nix/blob/67bcb99700a0da1395fa063d7c6586740b304598/tests/systemd-nspawn.nix.
A build can enable this by requiring the `uid-range` system feature,
i.e. by setting the derivation attribute
```
requiredSystemFeatures = [ "uid-range" ];
```
The `uid-range` system feature requires the `auto-allocate-uids`
setting to be enabled (see above).
* On Linux, Nix has experimental support for running builds inside a
cgroup. It can be enabled by adding
```
extra-experimental-features = cgroups
use-cgroups = true
```
to `nix.conf`. Cgroups are required for derivations that require the
`uid-range` system feature.
* `nix build --json` now prints some statistics about top-level
derivations, such as CPU statistics when cgroups are enabled.

View file

@ -506,6 +506,12 @@
overlay = self.overlays.default;
});
tests.containers = (import ./tests/containers.nix rec {
system = "x86_64-linux";
inherit nixpkgs;
overlay = self.overlays.default;
});
tests.setuid = nixpkgs.lib.genAttrs
["i686-linux" "x86_64-linux"]
(system:

View file

@ -931,7 +931,7 @@ std::vector<std::pair<std::shared_ptr<Installable>, BuiltPathWithResult>> Instal
case Realise::Outputs: {
if (settings.printMissing)
printMissing(store, pathsToBuild, lvlInfo);
printMissing(store, pathsToBuild, lvlInfo);
for (auto & buildResult : store->buildPathsWithResults(pathsToBuild, bMode, evalStore)) {
if (!buildResult.success())

View file

@ -5,7 +5,7 @@
#include <string>
#include <chrono>
#include <optional>
namespace nix {
@ -78,6 +78,9 @@ struct BuildResult
was repeated). */
time_t startTime = 0, stopTime = 0;
/* User and system CPU time the build took. */
std::optional<std::chrono::microseconds> cpuUser, cpuSystem;
bool success()
{
return status == Built || status == Substituted || status == AlreadyValid || status == ResolvesToAlreadyValid;

View file

@ -886,6 +886,14 @@ void DerivationGoal::buildDone()
cleanupPostChildKill();
if (buildResult.cpuUser && buildResult.cpuSystem) {
debug("builder for '%s' terminated with status %d, user CPU %.3fs, system CPU %.3fs",
worker.store.printStorePath(drvPath),
status,
((double) buildResult.cpuUser->count()) / 1000000,
((double) buildResult.cpuSystem->count()) / 1000000);
}
bool diskFull = false;
try {

View file

@ -14,6 +14,7 @@
#include "topo-sort.hh"
#include "callback.hh"
#include "json-utils.hh"
#include "cgroup.hh"
#include <regex>
#include <queue>
@ -129,26 +130,44 @@ void LocalDerivationGoal::killChild()
if (pid != -1) {
worker.childTerminated(this);
if (buildUser) {
/* If we're using a build user, then there is a tricky
race condition: if we kill the build user before the
child has done its setuid() to the build user uid, then
it won't be killed, and we'll potentially lock up in
pid.wait(). So also send a conventional kill to the
child. */
::kill(-pid, SIGKILL); /* ignore the result */
buildUser->kill();
pid.wait();
} else
pid.kill();
/* If we're using a build user, then there is a tricky race
condition: if we kill the build user before the child has
done its setuid() to the build user uid, then it won't be
killed, and we'll potentially lock up in pid.wait(). So
also send a conventional kill to the child. */
::kill(-pid, SIGKILL); /* ignore the result */
assert(pid == -1);
killSandbox(true);
pid.wait();
}
DerivationGoal::killChild();
}
void LocalDerivationGoal::killSandbox(bool getStats)
{
if (cgroup) {
#if __linux__
auto stats = destroyCgroup(*cgroup);
if (getStats) {
buildResult.cpuUser = stats.cpuUser;
buildResult.cpuSystem = stats.cpuSystem;
}
#else
abort();
#endif
}
else if (buildUser) {
auto uid = buildUser->getUID();
assert(uid != 0);
killUser(uid);
}
}
void LocalDerivationGoal::tryLocalBuild() {
unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) {
@ -158,28 +177,46 @@ void LocalDerivationGoal::tryLocalBuild() {
return;
}
/* If `build-users-group' is not empty, then we have to build as
one of the members of that group. */
if (settings.buildUsersGroup != "" && getuid() == 0) {
#if defined(__linux__) || defined(__APPLE__)
if (!buildUser) buildUser = std::make_unique<UserLock>();
/* Are we doing a chroot build? */
{
auto noChroot = parsedDrv->getBoolAttr("__noChroot");
if (settings.sandboxMode == smEnabled) {
if (noChroot)
throw Error("derivation '%s' has '__noChroot' set, "
"but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath));
#if __APPLE__
if (additionalSandboxProfile != "")
throw Error("derivation '%s' specifies a sandbox profile, "
"but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath));
#endif
useChroot = true;
}
else if (settings.sandboxMode == smDisabled)
useChroot = false;
else if (settings.sandboxMode == smRelaxed)
useChroot = derivationType.isSandboxed() && !noChroot;
}
if (buildUser->findFreeUser()) {
/* Make sure that no other processes are executing under this
uid. */
buildUser->kill();
} else {
auto & localStore = getLocalStore();
if (localStore.storeDir != localStore.realStoreDir.get()) {
#if __linux__
useChroot = true;
#else
throw Error("building using a diverted store is not supported on this platform");
#endif
}
if (useBuildUsers()) {
if (!buildUser)
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot);
if (!buildUser) {
if (!actLock)
actLock = std::make_unique<Activity>(*logger, lvlWarn, actBuildWaiting,
fmt("waiting for UID to build '%s'", yellowtxt(worker.store.printStorePath(drvPath))));
worker.waitForAWhile(shared_from_this());
return;
}
#else
/* Don't know how to block the creation of setuid/setgid
binaries on this platform. */
throw Error("build users are not supported on this platform for security reasons");
#endif
}
actLock.reset();
@ -270,7 +307,7 @@ void LocalDerivationGoal::cleanupPostChildKill()
malicious user from leaving behind a process that keeps files
open and modifies them after they have been chown'ed to
root. */
if (buildUser) buildUser->kill();
killSandbox(true);
/* Terminate the recursive Nix daemon. */
stopDaemon();
@ -363,6 +400,60 @@ static void linkOrCopy(const Path & from, const Path & to)
void LocalDerivationGoal::startBuilder()
{
if ((buildUser && buildUser->getUIDCount() != 1)
#if __linux__
|| settings.useCgroups
#endif
)
{
#if __linux__
settings.requireExperimentalFeature(Xp::Cgroups);
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto ourCgroup = ourCgroups[""];
if (ourCgroup == "")
throw Error("cannot determine cgroup name from /proc/self/cgroup");
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
if (!pathExists(ourCgroupPath))
throw Error("expected cgroup directory '%s'", ourCgroupPath);
static std::atomic<unsigned int> counter{0};
cgroup = buildUser
? fmt("%s/nix-build-uid-%d", ourCgroupPath, buildUser->getUID())
: fmt("%s/nix-build-pid-%d-%d", ourCgroupPath, getpid(), counter++);
debug("using cgroup '%s'", *cgroup);
/* When using a build user, record the cgroup we used for that
user so that if we got interrupted previously, we can kill
any left-over cgroup first. */
if (buildUser) {
auto cgroupsDir = settings.nixStateDir + "/cgroups";
createDirs(cgroupsDir);
auto cgroupFile = fmt("%s/%d", cgroupsDir, buildUser->getUID());
if (pathExists(cgroupFile)) {
auto prevCgroup = readFile(cgroupFile);
destroyCgroup(prevCgroup);
}
writeFile(cgroupFile, *cgroup);
}
#else
throw Error("cgroups are not supported on this platform");
#endif
}
/* Make sure that no other processes are executing under the
sandbox uids. This must be done before any chownToBuilder()
calls. */
killSandbox(false);
/* Right platform? */
if (!parsedDrv->canBuildLocally(worker.store))
throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}",
@ -376,35 +467,6 @@ void LocalDerivationGoal::startBuilder()
additionalSandboxProfile = parsedDrv->getStringAttr("__sandboxProfile").value_or("");
#endif
/* Are we doing a chroot build? */
{
auto noChroot = parsedDrv->getBoolAttr("__noChroot");
if (settings.sandboxMode == smEnabled) {
if (noChroot)
throw Error("derivation '%s' has '__noChroot' set, "
"but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath));
#if __APPLE__
if (additionalSandboxProfile != "")
throw Error("derivation '%s' specifies a sandbox profile, "
"but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath));
#endif
useChroot = true;
}
else if (settings.sandboxMode == smDisabled)
useChroot = false;
else if (settings.sandboxMode == smRelaxed)
useChroot = derivationType.isSandboxed() && !noChroot;
}
auto & localStore = getLocalStore();
if (localStore.storeDir != localStore.realStoreDir.get()) {
#if __linux__
useChroot = true;
#else
throw Error("building using a diverted store is not supported on this platform");
#endif
}
/* Create a temporary directory where the build will take
place. */
tmpDir = createTempDir("", "nix-build-" + std::string(drvPath.name()), false, false, 0700);
@ -580,10 +642,11 @@ void LocalDerivationGoal::startBuilder()
printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir);
if (mkdir(chrootRootDir.c_str(), 0750) == -1)
// FIXME: make this 0700
if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1)
throw SysError("cannot create '%1%'", chrootRootDir);
if (buildUser && chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1)
if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUIDCount() != 1 ? buildUser->getUID() : 0, buildUser->getGID()) == -1)
throw SysError("cannot change ownership of '%1%'", chrootRootDir);
/* Create a writable /tmp in the chroot. Many builders need
@ -597,6 +660,10 @@ void LocalDerivationGoal::startBuilder()
nobody account. The latter is kind of a hack to support
Samba-in-QEMU. */
createDirs(chrootRootDir + "/etc");
chownToBuilder(chrootRootDir + "/etc");
if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536))
throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name);
/* Declare the build user's group so that programs get a consistent
view of the system (e.g., "id -gn"). */
@ -647,12 +714,28 @@ void LocalDerivationGoal::startBuilder()
dirsInChroot.erase(worker.store.printStorePath(*i.second.second));
}
#elif __APPLE__
/* We don't really have any parent prep work to do (yet?)
All work happens in the child, instead. */
if (cgroup) {
if (mkdir(cgroup->c_str(), 0755) != 0)
throw SysError("creating cgroup '%s'", *cgroup);
chownToBuilder(*cgroup);
chownToBuilder(*cgroup + "/cgroup.procs");
chownToBuilder(*cgroup + "/cgroup.threads");
//chownToBuilder(*cgroup + "/cgroup.subtree_control");
}
#else
throw Error("sandboxing builds is not supported on this platform");
if (parsedDrv->useUidRange())
throw Error("feature 'uid-range' is not supported on this platform");
#if __APPLE__
/* We don't really have any parent prep work to do (yet?)
All work happens in the child, instead. */
#else
throw Error("sandboxing builds is not supported on this platform");
#endif
#endif
} else {
if (parsedDrv->useUidRange())
throw Error("feature 'uid-range' is only supported in sandboxed builds");
}
if (needsHashRewrite() && pathExists(homeDir))
@ -913,14 +996,16 @@ void LocalDerivationGoal::startBuilder()
the calling user (if build users are disabled). */
uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
writeFile("/proc/" + std::to_string(pid) + "/uid_map",
fmt("%d %d 1", sandboxUid(), hostUid));
fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
if (!buildUser || buildUser->getUIDCount() == 1)
writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
writeFile("/proc/" + std::to_string(pid) + "/gid_map",
fmt("%d %d 1", sandboxGid(), hostGid));
fmt("%d %d %d", sandboxGid(), hostGid, nrIds));
} else {
debug("note: not using a user namespace");
if (!buildUser)
@ -947,6 +1032,10 @@ void LocalDerivationGoal::startBuilder()
throw SysError("getting sandbox user namespace");
}
/* Move the child into its own cgroup. */
if (cgroup)
writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
/* Signal the builder that we've updated its user namespace. */
writeFull(userNamespaceSync.writeSide.get(), "1");
@ -1779,6 +1868,13 @@ void LocalDerivationGoal::runChild()
if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1)
throw SysError("mounting /proc");
/* Mount sysfs on /sys. */
if (buildUser && buildUser->getUIDCount() != 1) {
createDirs(chrootRootDir + "/sys");
if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1)
throw SysError("mounting /sys");
}
/* Mount a new tmpfs on /dev/shm to ensure that whatever
the builder puts in /dev/shm is cleaned up automatically. */
if (pathExists("/dev/shm") && mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0,
@ -1821,6 +1917,12 @@ void LocalDerivationGoal::runChild()
if (unshare(CLONE_NEWNS) == -1)
throw SysError("unsharing mount namespace");
/* Unshare the cgroup namespace. This means
/proc/self/cgroup will show the child's cgroup as '/'
rather than whatever it is in the parent. */
if (cgroup && unshare(CLONE_NEWCGROUP) == -1)
throw SysError("unsharing cgroup namespace");
/* Do the chroot(). */
if (chdir(chrootRootDir.c_str()) == -1)
throw SysError("cannot change directory to '%1%'", chrootRootDir);
@ -1906,9 +2008,8 @@ void LocalDerivationGoal::runChild()
if (setUser && buildUser) {
/* Preserve supplementary groups of the build user, to allow
admins to specify groups such as "kvm". */
if (!buildUser->getSupplementaryGIDs().empty() &&
setgroups(buildUser->getSupplementaryGIDs().size(),
buildUser->getSupplementaryGIDs().data()) == -1)
auto gids = buildUser->getSupplementaryGIDs();
if (setgroups(gids.size(), gids.data()) == -1)
throw SysError("cannot set supplementary groups of build user");
if (setgid(buildUser->getGID()) == -1 ||
@ -2237,7 +2338,10 @@ DrvOutputs LocalDerivationGoal::registerOutputs()
/* Canonicalise first. This ensures that the path we're
rewriting doesn't contain a hard link to /etc/shadow or
something like that. */
canonicalisePathMetaData(actualPath, buildUser ? buildUser->getUID() : -1, inodesSeen);
canonicalisePathMetaData(
actualPath,
buildUser ? std::optional(buildUser->getUIDRange()) : std::nullopt,
inodesSeen);
debug("scanning for references for output '%s' in temp location '%s'", outputName, actualPath);
@ -2330,6 +2434,10 @@ DrvOutputs LocalDerivationGoal::registerOutputs()
sink.s = rewriteStrings(sink.s, outputRewrites);
StringSource source(sink.s);
restorePath(actualPath, source);
/* FIXME: set proper permissions in restorePath() so
we don't have to do another traversal. */
canonicalisePathMetaData(actualPath, {}, inodesSeen);
}
};
@ -2492,7 +2600,7 @@ DrvOutputs LocalDerivationGoal::registerOutputs()
/* FIXME: set proper permissions in restorePath() so
we don't have to do another traversal. */
canonicalisePathMetaData(actualPath, -1, inodesSeen);
canonicalisePathMetaData(actualPath, {}, inodesSeen);
/* Calculate where we'll move the output files. In the checking case we
will leave leave them where they are, for now, rather than move to

View file

@ -15,6 +15,9 @@ struct LocalDerivationGoal : public DerivationGoal
/* The process ID of the builder. */
Pid pid;
/* The cgroup of the builder, if any. */
std::optional<Path> cgroup;
/* The temporary directory. */
Path tmpDir;
@ -92,8 +95,8 @@ struct LocalDerivationGoal : public DerivationGoal
result. */
std::map<Path, ValidPathInfo> prevInfos;
uid_t sandboxUid() { return usingUserNamespace ? 1000 : buildUser->getUID(); }
gid_t sandboxGid() { return usingUserNamespace ? 100 : buildUser->getGID(); }
uid_t sandboxUid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); }
gid_t sandboxGid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); }
const static Path homeDir;
@ -197,6 +200,10 @@ struct LocalDerivationGoal : public DerivationGoal
/* Forcibly kill the child process, if any. */
void killChild() override;
/* Kill any processes running under the build user UID or in the
cgroup of the build. */
void killSandbox(bool getStats);
/* Create alternative path calculated from but distinct from the
input, so we can avoid overwriting outputs (or other store paths)
that already exist. */

131
src/libstore/cgroup.cc Normal file
View file

@ -0,0 +1,131 @@
#if __linux__
#include "cgroup.hh"
#include "util.hh"
#include <chrono>
#include <cmath>
#include <regex>
#include <unordered_set>
#include <thread>
#include <dirent.h>
namespace nix {
// FIXME: obsolete, check for cgroup2
std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
{
std::map<std::string, std::string> cgroups;
for (auto & line : tokenizeString<std::vector<std::string>>(readFile(cgroupFile), "\n")) {
static std::regex regex("([0-9]+):([^:]*):(.*)");
std::smatch match;
if (!std::regex_match(line, match, regex))
throw Error("invalid line '%s' in '%s'", line, cgroupFile);
std::string name = hasPrefix(std::string(match[2]), "name=") ? std::string(match[2], 5) : match[2];
cgroups.insert_or_assign(name, match[3]);
}
return cgroups;
}
static CgroupStats destroyCgroup(const Path & cgroup, bool returnStats)
{
if (!pathExists(cgroup)) return {};
auto procsFile = cgroup + "/cgroup.procs";
if (!pathExists(procsFile))
throw Error("'%s' is not a cgroup", cgroup);
/* Use the fast way to kill every process in a cgroup, if
available. */
auto killFile = cgroup + "/cgroup.kill";
if (pathExists(killFile))
writeFile(killFile, "1");
/* Otherwise, manually kill every process in the subcgroups and
this cgroup. */
for (auto & entry : readDirectory(cgroup)) {
if (entry.type != DT_DIR) continue;
destroyCgroup(cgroup + "/" + entry.name, false);
}
int round = 1;
std::unordered_set<pid_t> pidsShown;
while (true) {
auto pids = tokenizeString<std::vector<std::string>>(readFile(procsFile));
if (pids.empty()) break;
if (round > 20)
throw Error("cannot kill cgroup '%s'", cgroup);
for (auto & pid_s : pids) {
pid_t pid;
if (auto o = string2Int<pid_t>(pid_s))
pid = *o;
else
throw Error("invalid pid '%s'", pid);
if (pidsShown.insert(pid).second) {
try {
auto cmdline = readFile(fmt("/proc/%d/cmdline", pid));
using namespace std::string_literals;
warn("killing stray builder process %d (%s)...",
pid, trim(replaceStrings(cmdline, "\0"s, " ")));
} catch (SysError &) {
}
}
// FIXME: pid wraparound
if (kill(pid, SIGKILL) == -1 && errno != ESRCH)
throw SysError("killing member %d of cgroup '%s'", pid, cgroup);
}
auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10)));
if (sleep.count() > 100)
printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup);
std::this_thread::sleep_for(sleep);
round++;
}
CgroupStats stats;
if (returnStats) {
auto cpustatPath = cgroup + "/cpu.stat";
if (pathExists(cpustatPath)) {
for (auto & line : tokenizeString<std::vector<std::string>>(readFile(cpustatPath), "\n")) {
std::string_view userPrefix = "user_usec ";
if (hasPrefix(line, userPrefix)) {
auto n = string2Int<uint64_t>(line.substr(userPrefix.size()));
if (n) stats.cpuUser = std::chrono::microseconds(*n);
}
std::string_view systemPrefix = "system_usec ";
if (hasPrefix(line, systemPrefix)) {
auto n = string2Int<uint64_t>(line.substr(systemPrefix.size()));
if (n) stats.cpuSystem = std::chrono::microseconds(*n);
}
}
}
}
if (rmdir(cgroup.c_str()) == -1)
throw SysError("deleting cgroup '%s'", cgroup);
return stats;
}
CgroupStats destroyCgroup(const Path & cgroup)
{
return destroyCgroup(cgroup, true);
}
}
#endif

27
src/libstore/cgroup.hh Normal file
View file

@ -0,0 +1,27 @@
#pragma once
#if __linux__
#include <chrono>
#include <optional>
#include "types.hh"
namespace nix {
std::map<std::string, std::string> getCgroups(const Path & cgroupFile);
struct CgroupStats
{
std::optional<std::chrono::microseconds> cpuUser, cpuSystem;
};
/* Destroy the cgroup denoted by 'path'. The postcondition is that
'path' does not exist, and thus any processes in the cgroup have
been killed. Also return statistics from the cgroup just before
destruction. */
CgroupStats destroyCgroup(const Path & cgroup);
}
#endif

View file

@ -130,6 +130,10 @@ StringSet Settings::getDefaultSystemFeatures()
actually require anything special on the machines. */
StringSet features{"nixos-test", "benchmark", "big-parallel"};
#if __linux__
features.insert("uid-range");
#endif
#if __linux__
if (access("/dev/kvm", R_OK | W_OK) == 0)
features.insert("kvm");

View file

@ -46,6 +46,14 @@ struct PluginFilesSetting : public BaseSetting<Paths>
void set(const std::string & str, bool append = false) override;
};
const uint32_t maxIdsPerBuild =
#if __linux__
1 << 16
#else
1
#endif
;
class Settings : public Config {
unsigned int getDefaultCores();
@ -275,6 +283,38 @@ public:
multi-user settings with untrusted users.
)"};
Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids",
"Whether to allocate UIDs for builders automatically."};
Setting<uint32_t> startId{this,
#if __linux__
0x34000000,
#else
56930,
#endif
"start-id",
"The first UID and GID to use for dynamic ID allocation."};
Setting<uint32_t> uidCount{this,
#if __linux__
maxIdsPerBuild * 128,
#else
128,
#endif
"id-count",
"The number of UIDs/GIDs to use for dynamic ID allocation."};
#if __linux__
Setting<bool> useCgroups{
this, false, "use-cgroups",
R"(
Whether to execute builds inside cgroups. Cgroups are
enabled automatically for derivations that require the
`uid-range` system feature.
)"
};
#endif
Setting<bool> impersonateLinux26{this, false, "impersonate-linux-26",
"Whether to impersonate a Linux 2.6 machine on newer kernels.",
{"build-impersonate-linux-26"}};
@ -563,10 +603,10 @@ public:
cache) must have a signature by a trusted key. A trusted key is one
listed in `trusted-public-keys`, or a public key counterpart to a
private key stored in a file listed in `secret-key-files`.
Set to `false` to disable signature checking and trust all
non-content-addressed paths unconditionally.
(Content-addressed paths are inherently trustworthy and thus
unaffected by this configuration option.)
)"};

View file

@ -583,7 +583,10 @@ void canonicaliseTimestampAndPermissions(const Path & path)
}
static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSeen & inodesSeen)
static void canonicalisePathMetaData_(
const Path & path,
std::optional<std::pair<uid_t, uid_t>> uidRange,
InodesSeen & inodesSeen)
{
checkInterrupt();
@ -630,7 +633,7 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe
However, ignore files that we chown'ed ourselves previously to
ensure that we don't fail on hard links within the same build
(i.e. "touch $out/foo; ln $out/foo $out/bar"). */
if (fromUid != (uid_t) -1 && st.st_uid != fromUid) {
if (uidRange && (st.st_uid < uidRange->first || st.st_uid > uidRange->second)) {
if (S_ISDIR(st.st_mode) || !inodesSeen.count(Inode(st.st_dev, st.st_ino)))
throw BuildError("invalid ownership on file '%1%'", path);
mode_t mode = st.st_mode & ~S_IFMT;
@ -663,14 +666,17 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe
if (S_ISDIR(st.st_mode)) {
DirEntries entries = readDirectory(path);
for (auto & i : entries)
canonicalisePathMetaData_(path + "/" + i.name, fromUid, inodesSeen);
canonicalisePathMetaData_(path + "/" + i.name, uidRange, inodesSeen);
}
}
void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen)
void canonicalisePathMetaData(
const Path & path,
std::optional<std::pair<uid_t, uid_t>> uidRange,
InodesSeen & inodesSeen)
{
canonicalisePathMetaData_(path, fromUid, inodesSeen);
canonicalisePathMetaData_(path, uidRange, inodesSeen);
/* On platforms that don't have lchown(), the top-level path can't
be a symlink, since we can't change its ownership. */
@ -683,10 +689,11 @@ void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & ino
}
void canonicalisePathMetaData(const Path & path, uid_t fromUid)
void canonicalisePathMetaData(const Path & path,
std::optional<std::pair<uid_t, uid_t>> uidRange)
{
InodesSeen inodesSeen;
canonicalisePathMetaData(path, fromUid, inodesSeen);
canonicalisePathMetaData(path, uidRange, inodesSeen);
}
@ -1331,7 +1338,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
autoGC();
canonicalisePathMetaData(realPath, -1);
canonicalisePathMetaData(realPath, {});
optimisePath(realPath, repair); // FIXME: combine with hashPath()
@ -1444,7 +1451,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
narHash = narSink.finish();
}
canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath
canonicalisePathMetaData(realPath, {}); // FIXME: merge into restorePath
optimisePath(realPath, repair);
@ -1486,7 +1493,7 @@ StorePath LocalStore::addTextToStore(
writeFile(realPath, s);
canonicalisePathMetaData(realPath, -1);
canonicalisePathMetaData(realPath, {});
StringSink sink;
dumpString(s, sink);

View file

@ -310,9 +310,18 @@ typedef std::set<Inode> InodesSeen;
- the permissions are set of 444 or 555 (i.e., read-only with or
without execute permission; setuid bits etc. are cleared)
- the owner and group are set to the Nix user and group, if we're
running as root. */
void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen);
void canonicalisePathMetaData(const Path & path, uid_t fromUid);
running as root.
If uidRange is not empty, this function will throw an error if it
encounters files owned by a user outside of the closed interval
[uidRange->first, uidRange->second].
*/
void canonicalisePathMetaData(
const Path & path,
std::optional<std::pair<uid_t, uid_t>> uidRange,
InodesSeen & inodesSeen);
void canonicalisePathMetaData(
const Path & path,
std::optional<std::pair<uid_t, uid_t>> uidRange);
void canonicaliseTimestampAndPermissions(const Path & path);

View file

@ -2,105 +2,197 @@
#include "globals.hh"
#include "pathlocks.hh"
#include <grp.h>
#include <pwd.h>
#include <fcntl.h>
#include <unistd.h>
#include <grp.h>
namespace nix {
UserLock::UserLock()
struct SimpleUserLock : UserLock
{
assert(settings.buildUsersGroup != "");
createDirs(settings.nixStateDir + "/userpool");
}
AutoCloseFD fdUserLock;
uid_t uid;
gid_t gid;
std::vector<gid_t> supplementaryGIDs;
bool UserLock::findFreeUser() {
if (enabled()) return true;
uid_t getUID() override { assert(uid); return uid; }
uid_t getUIDCount() override { return 1; }
gid_t getGID() override { assert(gid); return gid; }
/* Get the members of the build-users-group. */
struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str());
if (!gr)
throw Error("the group '%1%' specified in 'build-users-group' does not exist",
settings.buildUsersGroup);
gid = gr->gr_gid;
std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; }
/* Copy the result of getgrnam. */
Strings users;
for (char * * p = gr->gr_mem; *p; ++p) {
debug("found build user '%1%'", *p);
users.push_back(*p);
}
static std::unique_ptr<UserLock> acquire()
{
assert(settings.buildUsersGroup != "");
createDirs(settings.nixStateDir + "/userpool");
if (users.empty())
throw Error("the build users group '%1%' has no members",
settings.buildUsersGroup);
/* Get the members of the build-users-group. */
struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str());
if (!gr)
throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup);
/* Find a user account that isn't currently in use for another
build. */
for (auto & i : users) {
debug("trying user '%1%'", i);
struct passwd * pw = getpwnam(i.c_str());
if (!pw)
throw Error("the user '%1%' in the group '%2%' does not exist",
i, settings.buildUsersGroup);
fnUserLock = (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str();
AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
if (!fd)
throw SysError("opening user lock '%1%'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) {
fdUserLock = std::move(fd);
user = i;
uid = pw->pw_uid;
/* Sanity check... */
if (uid == getuid() || uid == geteuid())
throw Error("the Nix user should not be a member of '%1%'",
settings.buildUsersGroup);
#if __linux__
/* Get the list of supplementary groups of this build user. This
is usually either empty or contains a group such as "kvm". */
int ngroups = 32; // arbitrary initial guess
supplementaryGIDs.resize(ngroups);
int err = getgrouplist(pw->pw_name, pw->pw_gid, supplementaryGIDs.data(),
&ngroups);
// Our initial size of 32 wasn't sufficient, the correct size has
// been stored in ngroups, so we try again.
if (err == -1) {
supplementaryGIDs.resize(ngroups);
err = getgrouplist(pw->pw_name, pw->pw_gid, supplementaryGIDs.data(),
&ngroups);
}
// If it failed once more, then something must be broken.
if (err == -1)
throw Error("failed to get list of supplementary groups for '%1%'",
pw->pw_name);
// Finally, trim back the GID list to its real size
supplementaryGIDs.resize(ngroups);
#endif
isEnabled = true;
return true;
/* Copy the result of getgrnam. */
Strings users;
for (char * * p = gr->gr_mem; *p; ++p) {
debug("found build user '%s'", *p);
users.push_back(*p);
}
if (users.empty())
throw Error("the build users group '%s' has no members", settings.buildUsersGroup);
/* Find a user account that isn't currently in use for another
build. */
for (auto & i : users) {
debug("trying user '%s'", i);
struct passwd * pw = getpwnam(i.c_str());
if (!pw)
throw Error("the user '%s' in the group '%s' does not exist", i, settings.buildUsersGroup);
auto fnUserLock = fmt("%s/userpool/%s", settings.nixStateDir,pw->pw_uid);
AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
if (!fd)
throw SysError("opening user lock '%s'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) {
auto lock = std::make_unique<SimpleUserLock>();
lock->fdUserLock = std::move(fd);
lock->uid = pw->pw_uid;
lock->gid = gr->gr_gid;
/* Sanity check... */
if (lock->uid == getuid() || lock->uid == geteuid())
throw Error("the Nix user should not be a member of '%s'", settings.buildUsersGroup);
#if __linux__
/* Get the list of supplementary groups of this build
user. This is usually either empty or contains a
group such as "kvm". */
int ngroups = 32; // arbitrary initial guess
std::vector<gid_t> gids;
gids.resize(ngroups);
int err = getgrouplist(
pw->pw_name, pw->pw_gid,
gids.data(),
&ngroups);
/* Our initial size of 32 wasn't sufficient, the
correct size has been stored in ngroups, so we try
again. */
if (err == -1) {
gids.resize(ngroups);
err = getgrouplist(
pw->pw_name, pw->pw_gid,
gids.data(),
&ngroups);
}
// If it failed once more, then something must be broken.
if (err == -1)
throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name);
// Finally, trim back the GID list to its real size.
for (auto i = 0; i < ngroups; i++)
if (gids[i] != lock->gid)
lock->supplementaryGIDs.push_back(gids[i]);
#endif
return lock;
}
}
return nullptr;
}
};
return false;
}
void UserLock::kill()
struct AutoUserLock : UserLock
{
killUser(uid);
AutoCloseFD fdUserLock;
uid_t firstUid = 0;
gid_t firstGid = 0;
uid_t nrIds = 1;
uid_t getUID() override { assert(firstUid); return firstUid; }
gid_t getUIDCount() override { return nrIds; }
gid_t getGID() override { assert(firstGid); return firstGid; }
std::vector<gid_t> getSupplementaryGIDs() override { return {}; }
static std::unique_ptr<UserLock> acquire(uid_t nrIds, bool useChroot)
{
settings.requireExperimentalFeature(Xp::AutoAllocateUids);
assert(settings.startId > 0);
assert(settings.uidCount % maxIdsPerBuild == 0);
assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
assert(nrIds <= maxIdsPerBuild);
createDirs(settings.nixStateDir + "/userpool2");
size_t nrSlots = settings.uidCount / maxIdsPerBuild;
for (size_t i = 0; i < nrSlots; i++) {
debug("trying user slot '%d'", i);
createDirs(settings.nixStateDir + "/userpool2");
auto fnUserLock = fmt("%s/userpool2/slot-%d", settings.nixStateDir, i);
AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
if (!fd)
throw SysError("opening user lock '%s'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) {
auto firstUid = settings.startId + i * maxIdsPerBuild;
auto pw = getpwuid(firstUid);
if (pw)
throw Error("auto-allocated UID %d clashes with existing user account '%s'", firstUid, pw->pw_name);
auto lock = std::make_unique<AutoUserLock>();
lock->fdUserLock = std::move(fd);
lock->firstUid = firstUid;
if (useChroot)
lock->firstGid = firstUid;
else {
struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str());
if (!gr)
throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup);
lock->firstGid = gr->gr_gid;
}
lock->nrIds = nrIds;
return lock;
}
}
return nullptr;
}
};
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds, bool useChroot)
{
if (settings.autoAllocateUids)
return AutoUserLock::acquire(nrIds, useChroot);
else
return SimpleUserLock::acquire();
}
bool useBuildUsers()
{
#if __linux__
static bool b = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0;
return b;
#elif __APPLE__
static bool b = settings.buildUsersGroup != "" && getuid() == 0;
return b;
#else
return false;
#endif
}
}

View file

@ -1,37 +1,38 @@
#pragma once
#include "sync.hh"
#include "types.hh"
#include "util.hh"
#include <optional>
#include <sys/types.h>
namespace nix {
class UserLock
struct UserLock
{
private:
Path fnUserLock;
AutoCloseFD fdUserLock;
virtual ~UserLock() { }
bool isEnabled = false;
std::string user;
uid_t uid = 0;
gid_t gid = 0;
std::vector<gid_t> supplementaryGIDs;
/* Get the first and last UID. */
std::pair<uid_t, uid_t> getUIDRange()
{
auto first = getUID();
return {first, first + getUIDCount() - 1};
}
public:
UserLock();
/* Get the first UID. */
virtual uid_t getUID() = 0;
void kill();
virtual uid_t getUIDCount() = 0;
std::string getUser() { return user; }
uid_t getUID() { assert(uid); return uid; }
uid_t getGID() { assert(gid); return gid; }
std::vector<gid_t> getSupplementaryGIDs() { return supplementaryGIDs; }
bool findFreeUser();
bool enabled() { return isEnabled; }
virtual gid_t getGID() = 0;
virtual std::vector<gid_t> getSupplementaryGIDs() = 0;
};
/* Acquire a user lock for a UID range of size `nrIds`. Note that this
may return nullptr if no user is available. */
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds, bool useChroot);
bool useBuildUsers();
}

View file

@ -89,6 +89,7 @@ std::optional<Strings> ParsedDerivation::getStringsAttr(const std::string & name
StringSet ParsedDerivation::getRequiredSystemFeatures() const
{
// FIXME: cache this?
StringSet res;
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
res.insert(i);
@ -124,6 +125,11 @@ bool ParsedDerivation::substitutesAllowed() const
return getBoolAttr("allowSubstitutes", true);
}
bool ParsedDerivation::useUidRange() const
{
return getRequiredSystemFeatures().count("uid-range");
}
static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*");
std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths)

View file

@ -38,6 +38,8 @@ public:
bool substitutesAllowed() const;
bool useUidRange() const;
std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths);
};

View file

@ -14,6 +14,8 @@ std::map<ExperimentalFeature, std::string> stringifiedXpFeatures = {
{ Xp::NoUrlLiterals, "no-url-literals" },
{ Xp::FetchClosure, "fetch-closure" },
{ Xp::ReplFlake, "repl-flake" },
{ Xp::AutoAllocateUids, "auto-allocate-uids" },
{ Xp::Cgroups, "cgroups" },
};
const std::optional<ExperimentalFeature> parseExperimentalFeature(const std::string_view & name)

View file

@ -23,6 +23,8 @@ enum struct ExperimentalFeature
NoUrlLiterals,
FetchClosure,
ReplFlake,
AutoAllocateUids,
Cgroups,
};
/**

View file

@ -1,5 +1,6 @@
#include <sys/time.h>
#include <filesystem>
#include <atomic>
#include "finally.hh"
#include "util.hh"
@ -10,7 +11,7 @@ namespace fs = std::filesystem;
namespace nix {
static Path tempName(Path tmpRoot, const Path & prefix, bool includePid,
int & counter)
std::atomic<unsigned int> & counter)
{
tmpRoot = canonPath(tmpRoot.empty() ? getEnv("TMPDIR").value_or("/tmp") : tmpRoot, true);
if (includePid)
@ -22,9 +23,9 @@ static Path tempName(Path tmpRoot, const Path & prefix, bool includePid,
Path createTempDir(const Path & tmpRoot, const Path & prefix,
bool includePid, bool useGlobalCounter, mode_t mode)
{
static int globalCounter = 0;
int localCounter = 0;
int & counter(useGlobalCounter ? globalCounter : localCounter);
static std::atomic<unsigned int> globalCounter = 0;
std::atomic<unsigned int> localCounter = 0;
auto & counter(useGlobalCounter ? globalCounter : localCounter);
while (1) {
checkInterrupt();

View file

@ -516,7 +516,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise)
if (!store->isValidPath(info->path) || reregister) {
/* !!! races */
if (canonicalise)
canonicalisePathMetaData(store->printStorePath(info->path), -1);
canonicalisePathMetaData(store->printStorePath(info->path), {});
if (!hashGiven) {
HashResult hash = hashPath(htSHA256, store->printStorePath(info->path));
info->narHash = hash.first;

View file

@ -30,6 +30,10 @@ nlohmann::json builtPathsWithResultToJSON(const std::vector<BuiltPathWithResult>
if (b.result) {
j["startTime"] = b.result->startTime;
j["stopTime"] = b.result->stopTime;
if (b.result->cpuUser)
j["cpuUser"] = ((double) b.result->cpuUser->count()) / 1000000;
if (b.result->cpuSystem)
j["cpuSystem"] = ((double) b.result->cpuSystem->count()) / 1000000;
}
res.push_back(j);
}, b.path.raw());

68
tests/containers.nix Normal file
View file

@ -0,0 +1,68 @@
# Test whether we can run a NixOS container inside a Nix build using systemd-nspawn.
{ nixpkgs, system, overlay }:
with import (nixpkgs + "/nixos/lib/testing-python.nix") {
inherit system;
extraConfigurations = [ { nixpkgs.overlays = [ overlay ]; } ];
};
makeTest ({
name = "containers";
nodes =
{
host =
{ config, lib, pkgs, nodes, ... }:
{ virtualisation.writableStore = true;
virtualisation.diskSize = 2048;
virtualisation.additionalPaths =
[ pkgs.stdenv
(import ./systemd-nspawn.nix { inherit nixpkgs; }).toplevel
];
virtualisation.memorySize = 4096;
nix.binaryCaches = lib.mkForce [ ];
nix.extraOptions =
''
extra-experimental-features = nix-command auto-allocate-uids cgroups
extra-system-features = uid-range
'';
nix.nixPath = [ "nixpkgs=${nixpkgs}" ];
};
};
testScript = { nodes }: ''
start_all()
host.succeed("nix --version >&2")
# Test that 'id' gives the expected result in various configurations.
# Existing UIDs, sandbox.
host.succeed("nix build --no-auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-1")
host.succeed("[[ $(cat ./result) = 'uid=1000(nixbld) gid=100(nixbld) groups=100(nixbld)' ]]")
# Existing UIDs, no sandbox.
host.succeed("nix build --no-auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-2")
host.succeed("[[ $(cat ./result) = 'uid=30001(nixbld1) gid=30000(nixbld) groups=30000(nixbld)' ]]")
# Auto-allocated UIDs, sandbox.
host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-3")
host.succeed("[[ $(cat ./result) = 'uid=1000(nixbld) gid=100(nixbld) groups=100(nixbld)' ]]")
# Auto-allocated UIDs, no sandbox.
host.succeed("nix build --auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-4")
host.succeed("[[ $(cat ./result) = 'uid=872415232 gid=30000(nixbld) groups=30000(nixbld)' ]]")
# Auto-allocated UIDs, UID range, sandbox.
host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-5 --arg uidRange true")
host.succeed("[[ $(cat ./result) = 'uid=0(root) gid=0(root) groups=0(root)' ]]")
# Auto-allocated UIDs, UID range, no sandbox.
host.fail("nix build --auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-6 --arg uidRange true")
# Run systemd-nspawn in a Nix build.
host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./systemd-nspawn.nix} --argstr nixpkgs ${nixpkgs}")
host.succeed("[[ $(cat ./result/msg) = 'Hello World' ]]")
'';
})

8
tests/id-test.nix Normal file
View file

@ -0,0 +1,8 @@
{ name, uidRange ? false }:
with import <nixpkgs> {};
runCommand name
{ requiredSystemFeatures = if uidRange then ["uid-range"] else [];
}
"id; id > $out"

78
tests/systemd-nspawn.nix Normal file
View file

@ -0,0 +1,78 @@
{ nixpkgs }:
let
machine = { config, pkgs, ... }:
{
system.stateVersion = "22.05";
boot.isContainer = true;
systemd.services.console-getty.enable = false;
networking.dhcpcd.enable = false;
services.httpd = {
enable = true;
adminAddr = "nixos@example.org";
};
systemd.services.test = {
wantedBy = [ "multi-user.target" ];
after = [ "httpd.service" ];
script = ''
source /.env
echo "Hello World" > $out/msg
ls -lR /dev > $out/dev
${pkgs.curl}/bin/curl -sS --fail http://localhost/ > $out/page.html
'';
unitConfig = {
FailureAction = "exit-force";
FailureActionExitStatus = 42;
SuccessAction = "exit-force";
};
};
};
cfg = (import (nixpkgs + "/nixos/lib/eval-config.nix") {
modules = [ machine ];
system = "x86_64-linux";
});
config = cfg.config;
in
with cfg._module.args.pkgs;
runCommand "test"
{ buildInputs = [ config.system.path ];
requiredSystemFeatures = [ "uid-range" ];
toplevel = config.system.build.toplevel;
}
''
root=$(pwd)/root
mkdir -p $root $root/etc
export > $root/.env
# Make /run a tmpfs to shut up a systemd warning.
mkdir /run
mount -t tmpfs none /run
chmod 0700 /run
mount -t cgroup2 none /sys/fs/cgroup
mkdir -p $out
touch /etc/os-release
echo a5ea3f98dedc0278b6f3cc8c37eeaeac > /etc/machine-id
SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1 \
${config.systemd.package}/bin/systemd-nspawn \
--keep-unit \
-M ${config.networking.hostName} -D "$root" \
--register=no \
--resolv-conf=off \
--bind-ro=/nix/store \
--bind=$out \
--private-network \
$toplevel/init
''