Generalize DerivationType in preparation for impure derivations

This commit is contained in:
John Ericson 2022-03-18 00:36:52 +00:00
parent 049fae155a
commit a544ed7684
9 changed files with 148 additions and 95 deletions

View file

@ -300,7 +300,7 @@ connected:
std::set<Realisation> missingRealisations; std::set<Realisation> missingRealisations;
StorePathSet missingPaths; StorePathSet missingPaths;
if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !derivationHasKnownOutputPaths(drv.type())) { if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !drv.type().hasKnownOutputPaths()) {
for (auto & outputName : wantedOutputs) { for (auto & outputName : wantedOutputs) {
auto thisOutputHash = outputHashes.at(outputName); auto thisOutputHash = outputHashes.at(outputName);
auto thisOutputId = DrvOutput{ thisOutputHash, outputName }; auto thisOutputId = DrvOutput{ thisOutputHash, outputName };

View file

@ -1235,11 +1235,8 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
/* Optimisation, but required in read-only mode! because in that /* Optimisation, but required in read-only mode! because in that
case we don't actually write store derivations, so we can't case we don't actually write store derivations, so we can't
read them later. read them later. */
{
However, we don't bother doing this for floating CA derivations because
their "hash modulo" is indeterminate until built. */
if (drv.type() != DerivationType::CAFloating) {
auto h = hashDerivationModulo(*state.store, drv, false); auto h = hashDerivationModulo(*state.store, drv, false);
drvHashes.lock()->insert_or_assign(drvPath, h); drvHashes.lock()->insert_or_assign(drvPath, h);
} }

View file

@ -204,7 +204,7 @@ void DerivationGoal::haveDerivation()
{ {
trace("have derivation"); trace("have derivation");
if (drv->type() == DerivationType::CAFloating) if (!drv->type().hasKnownOutputPaths())
settings.requireExperimentalFeature(Xp::CaDerivations); settings.requireExperimentalFeature(Xp::CaDerivations);
retrySubstitution = false; retrySubstitution = false;
@ -440,9 +440,28 @@ void DerivationGoal::inputsRealised()
if (useDerivation) { if (useDerivation) {
auto & fullDrv = *dynamic_cast<Derivation *>(drv.get()); auto & fullDrv = *dynamic_cast<Derivation *>(drv.get());
if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && auto drvType = fullDrv.type();
((!fullDrv.inputDrvs.empty() && derivationIsCA(fullDrv.type())) bool resolveDrv = std::visit(overloaded {
|| fullDrv.type() == DerivationType::DeferredInputAddressed)) { [&](const DerivationType::InputAddressed & ia) {
/* must resolve if deferred. */
return ia.deferred;
},
[&](const DerivationType::ContentAddressed & ca) {
return !fullDrv.inputDrvs.empty() && (
ca.fixed
/* Can optionally resolve if fixed, which is good
for avoiding unnecessary rebuilds. */
? settings.isExperimentalFeatureEnabled(Xp::CaDerivations)
/* Must resolve if floating and there are any inputs
drvs. */
: true);
},
}, drvType.raw());
if (resolveDrv)
{
settings.requireExperimentalFeature(Xp::CaDerivations);
/* We are be able to resolve this derivation based on the /* We are be able to resolve this derivation based on the
now-known results of dependencies. If so, we become a stub goal now-known results of dependencies. If so, we become a stub goal
aliasing that resolved derivation goal */ aliasing that resolved derivation goal */
@ -501,7 +520,7 @@ void DerivationGoal::inputsRealised()
/* Don't repeat fixed-output derivations since they're already /* Don't repeat fixed-output derivations since they're already
verified by their output hash.*/ verified by their output hash.*/
nrRounds = derivationIsFixed(derivationType) ? 1 : settings.buildRepeat + 1; nrRounds = derivationType.isFixed() ? 1 : settings.buildRepeat + 1;
/* Okay, try to build. Note that here we don't wait for a build /* Okay, try to build. Note that here we don't wait for a build
slot to become available, since we don't need one if there is a slot to become available, since we don't need one if there is a
@ -908,7 +927,7 @@ void DerivationGoal::buildDone()
st = st =
dynamic_cast<NotDeterministic*>(&e) ? BuildResult::NotDeterministic : dynamic_cast<NotDeterministic*>(&e) ? BuildResult::NotDeterministic :
statusOk(status) ? BuildResult::OutputRejected : statusOk(status) ? BuildResult::OutputRejected :
derivationIsImpure(derivationType) || diskFull ? BuildResult::TransientFailure : derivationType.isImpure() || diskFull ? BuildResult::TransientFailure :
BuildResult::PermanentFailure; BuildResult::PermanentFailure;
} }
@ -1221,7 +1240,7 @@ void DerivationGoal::flushLine()
std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDerivationOutputMap() std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDerivationOutputMap()
{ {
if (!useDerivation || drv->type() != DerivationType::CAFloating) { if (!useDerivation || drv->type().hasKnownOutputPaths()) {
std::map<std::string, std::optional<StorePath>> res; std::map<std::string, std::optional<StorePath>> res;
for (auto & [name, output] : drv->outputs) for (auto & [name, output] : drv->outputs)
res.insert_or_assign(name, output.path(worker.store, drv->name, name)); res.insert_or_assign(name, output.path(worker.store, drv->name, name));
@ -1233,7 +1252,7 @@ std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDeri
OutputPathMap DerivationGoal::queryDerivationOutputMap() OutputPathMap DerivationGoal::queryDerivationOutputMap()
{ {
if (!useDerivation || drv->type() != DerivationType::CAFloating) { if (!useDerivation || drv->type().hasKnownOutputPaths()) {
OutputPathMap res; OutputPathMap res;
for (auto & [name, output] : drv->outputsAndOptPaths(worker.store)) for (auto & [name, output] : drv->outputsAndOptPaths(worker.store))
res.insert_or_assign(name, *output.second); res.insert_or_assign(name, *output.second);

View file

@ -395,7 +395,7 @@ void LocalDerivationGoal::startBuilder()
else if (settings.sandboxMode == smDisabled) else if (settings.sandboxMode == smDisabled)
useChroot = false; useChroot = false;
else if (settings.sandboxMode == smRelaxed) else if (settings.sandboxMode == smRelaxed)
useChroot = !(derivationIsImpure(derivationType)) && !noChroot; useChroot = !(derivationType.isImpure()) && !noChroot;
} }
auto & localStore = getLocalStore(); auto & localStore = getLocalStore();
@ -608,7 +608,7 @@ void LocalDerivationGoal::startBuilder()
"nogroup:x:65534:\n", sandboxGid())); "nogroup:x:65534:\n", sandboxGid()));
/* Create /etc/hosts with localhost entry. */ /* Create /etc/hosts with localhost entry. */
if (!(derivationIsImpure(derivationType))) if (!(derivationType.isImpure()))
writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n::1 localhost\n"); writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n::1 localhost\n");
/* Make the closure of the inputs available in the chroot, /* Make the closure of the inputs available in the chroot,
@ -796,7 +796,7 @@ void LocalDerivationGoal::startBuilder()
us. us.
*/ */
if (!(derivationIsImpure(derivationType))) if (!(derivationType.isImpure()))
privateNetwork = true; privateNetwork = true;
userNamespaceSync.create(); userNamespaceSync.create();
@ -1049,7 +1049,7 @@ void LocalDerivationGoal::initEnv()
derivation, tell the builder, so that for instance `fetchurl' derivation, tell the builder, so that for instance `fetchurl'
can skip checking the output. On older Nixes, this environment can skip checking the output. On older Nixes, this environment
variable won't be set, so `fetchurl' will do the check. */ variable won't be set, so `fetchurl' will do the check. */
if (derivationIsFixed(derivationType)) env["NIX_OUTPUT_CHECKED"] = "1"; if (derivationType.isFixed()) env["NIX_OUTPUT_CHECKED"] = "1";
/* *Only* if this is a fixed-output derivation, propagate the /* *Only* if this is a fixed-output derivation, propagate the
values of the environment variables specified in the values of the environment variables specified in the
@ -1060,7 +1060,7 @@ void LocalDerivationGoal::initEnv()
to the builder is generally impure, but the output of to the builder is generally impure, but the output of
fixed-output derivations is by definition pure (since we fixed-output derivations is by definition pure (since we
already know the cryptographic hash of the output). */ already know the cryptographic hash of the output). */
if (derivationIsImpure(derivationType)) { if (derivationType.isImpure()) {
for (auto & i : parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings())) for (auto & i : parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings()))
env[i] = getEnv(i).value_or(""); env[i] = getEnv(i).value_or("");
} }
@ -1674,7 +1674,7 @@ void LocalDerivationGoal::runChild()
/* Fixed-output derivations typically need to access the /* Fixed-output derivations typically need to access the
network, so give them access to /etc/resolv.conf and so network, so give them access to /etc/resolv.conf and so
on. */ on. */
if (derivationIsImpure(derivationType)) { if (derivationType.isImpure()) {
// Only use nss functions to resolve hosts and // Only use nss functions to resolve hosts and
// services. Dont use it for anything else that may // services. Dont use it for anything else that may
// be configured for this system. This limits the // be configured for this system. This limits the
@ -1918,7 +1918,7 @@ void LocalDerivationGoal::runChild()
sandboxProfile += "(import \"sandbox-defaults.sb\")\n"; sandboxProfile += "(import \"sandbox-defaults.sb\")\n";
if (derivationIsImpure(derivationType)) if (derivationType.isImpure())
sandboxProfile += "(import \"sandbox-network.sb\")\n"; sandboxProfile += "(import \"sandbox-network.sb\")\n";
/* Add the output paths we'll use at build-time to the chroot */ /* Add the output paths we'll use at build-time to the chroot */

View file

@ -560,6 +560,8 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
BuildMode buildMode = (BuildMode) readInt(from); BuildMode buildMode = (BuildMode) readInt(from);
logger->startWork(); logger->startWork();
auto drvType = drv.type();
/* Content-addressed derivations are trustless because their output paths /* Content-addressed derivations are trustless because their output paths
are verified by their content alone, so any derivation is free to are verified by their content alone, so any derivation is free to
try to produce such a path. try to produce such a path.
@ -592,12 +594,12 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
derivations, we throw out the precomputed output paths and just derivations, we throw out the precomputed output paths and just
store the hashes, so there aren't two competing sources of truth an store the hashes, so there aren't two competing sources of truth an
attacker could exploit. */ attacker could exploit. */
if (drv.type() == DerivationType::InputAddressed && !trusted) if (!(drvType.isCA() || trusted))
throw Error("you are not privileged to build input-addressed derivations"); throw Error("you are not privileged to build input-addressed derivations");
/* Make sure that the non-input-addressed derivations that got this far /* Make sure that the non-input-addressed derivations that got this far
are in fact content-addressed if we don't trust them. */ are in fact content-addressed if we don't trust them. */
assert(derivationIsCA(drv.type()) || trusted); assert(drvType.isCA() || trusted);
/* Recompute the derivation path when we cannot trust the original. */ /* Recompute the derivation path when we cannot trust the original. */
if (!trusted) { if (!trusted) {
@ -606,7 +608,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
original not-necessarily-resolved derivation to verify the drv original not-necessarily-resolved derivation to verify the drv
derivation as adequate claim to the input-addressed output derivation as adequate claim to the input-addressed output
paths. */ paths. */
assert(derivationIsCA(drv.type())); assert(drvType.isCA());
Derivation drv2; Derivation drv2;
static_cast<BasicDerivation &>(drv2) = drv; static_cast<BasicDerivation &>(drv2) = drv;

View file

@ -36,47 +36,46 @@ StorePath DerivationOutput::CAFixed::path(const Store & store, std::string_view
} }
bool derivationIsCA(DerivationType dt) { bool DerivationType::isCA() const {
switch (dt) { /* Normally we do the full `std::visit` to make sure we have
case DerivationType::InputAddressed: return false; exhaustively handled all variants, but so long as there is a
case DerivationType::CAFixed: return true; variant called `ContentAddressed`, it must be the only one for
case DerivationType::CAFloating: return true; which `isCA` is true for this to make sense!. */
case DerivationType::DeferredInputAddressed: return false; return std::holds_alternative<ContentAddressed>(raw());
};
// Since enums can have non-variant values, but making a `default:` would
// disable exhaustiveness warnings.
assert(false);
} }
bool derivationIsFixed(DerivationType dt) { bool DerivationType::isFixed() const {
switch (dt) { return std::visit(overloaded {
case DerivationType::InputAddressed: return false; [](const InputAddressed & ia) {
case DerivationType::CAFixed: return true; return false;
case DerivationType::CAFloating: return false; },
case DerivationType::DeferredInputAddressed: return false; [](const ContentAddressed & ca) {
}; return ca.fixed;
assert(false); },
}, raw());
} }
bool derivationHasKnownOutputPaths(DerivationType dt) { bool DerivationType::hasKnownOutputPaths() const {
switch (dt) { return std::visit(overloaded {
case DerivationType::InputAddressed: return true; [](const InputAddressed & ia) {
case DerivationType::CAFixed: return true; return !ia.deferred;
case DerivationType::CAFloating: return false; },
case DerivationType::DeferredInputAddressed: return false; [](const ContentAddressed & ca) {
}; return ca.fixed;
assert(false); },
}, raw());
} }
bool derivationIsImpure(DerivationType dt) { bool DerivationType::isImpure() const {
switch (dt) { return std::visit(overloaded {
case DerivationType::InputAddressed: return false; [](const InputAddressed & ia) {
case DerivationType::CAFixed: return true; return false;
case DerivationType::CAFloating: return false; },
case DerivationType::DeferredInputAddressed: return false; [](const ContentAddressed & ca) {
}; return !ca.pure;
assert(false); },
}, raw());
} }
@ -439,18 +438,28 @@ DerivationType BasicDerivation::type() const
if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
throw Error("Must have at least one output"); throw Error("Must have at least one output");
} else if (! inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { } else if (! inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
return DerivationType::InputAddressed; return DerivationType::InputAddressed {
.deferred = false,
};
} else if (inputAddressedOutputs.empty() && ! fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { } else if (inputAddressedOutputs.empty() && ! fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
if (fixedCAOutputs.size() > 1) if (fixedCAOutputs.size() > 1)
// FIXME: Experimental feature? // FIXME: Experimental feature?
throw Error("Only one fixed output is allowed for now"); throw Error("Only one fixed output is allowed for now");
if (*fixedCAOutputs.begin() != "out") if (*fixedCAOutputs.begin() != "out")
throw Error("Single fixed output must be named \"out\""); throw Error("Single fixed output must be named \"out\"");
return DerivationType::CAFixed; return DerivationType::ContentAddressed {
.pure = false,
.fixed = true,
};
} else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && ! floatingCAOutputs.empty() && deferredIAOutputs.empty()) { } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && ! floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
return DerivationType::CAFloating; return DerivationType::ContentAddressed {
.pure = true,
.fixed = false,
};
} else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && !deferredIAOutputs.empty()) { } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && !deferredIAOutputs.empty()) {
return DerivationType::DeferredInputAddressed; return DerivationType::InputAddressed {
.deferred = true,
};
} else { } else {
throw Error("Can't mix derivation output types"); throw Error("Can't mix derivation output types");
} }
@ -502,10 +511,10 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath &
*/ */
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{ {
auto kind = DrvHash::Kind::Regular; auto type = drv.type();
/* Return a fixed hash for fixed-output derivations. */ /* Return a fixed hash for fixed-output derivations. */
switch (drv.type()) { if (type.isFixed()) {
case DerivationType::CAFixed: {
std::map<std::string, Hash> outputHashes; std::map<std::string, Hash> outputHashes;
for (const auto & i : drv.outputs) { for (const auto & i : drv.outputs) {
auto & dof = std::get<DerivationOutput::CAFixed>(i.second.raw()); auto & dof = std::get<DerivationOutput::CAFixed>(i.second.raw());
@ -517,14 +526,19 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
} }
return outputHashes; return outputHashes;
} }
case DerivationType::CAFloating:
kind = DrvHash::Kind::Deferred; auto kind = std::visit(overloaded {
break; [](const DerivationType::InputAddressed & ia) {
case DerivationType::InputAddressed: /* This might be a "pesimistically" deferred output, so we don't
break; "taint" the kind yet. */
case DerivationType::DeferredInputAddressed: return DrvHash::Kind::Regular;
break; },
} [](const DerivationType::ContentAddressed & ca) {
return ca.fixed
? DrvHash::Kind::Regular
: DrvHash::Kind::Deferred;
},
}, drv.type().raw());
/* For other derivations, replace the inputs paths with recursive /* For other derivations, replace the inputs paths with recursive
calls to this function. */ calls to this function. */

View file

@ -85,30 +85,50 @@ typedef std::map<std::string, std::pair<DerivationOutput, std::optional<StorePat
output IDs we are interested in. */ output IDs we are interested in. */
typedef std::map<StorePath, StringSet> DerivationInputs; typedef std::map<StorePath, StringSet> DerivationInputs;
enum struct DerivationType : uint8_t { struct DerivationType_InputAddressed {
InputAddressed, bool deferred;
DeferredInputAddressed,
CAFixed,
CAFloating,
}; };
/* Do the outputs of the derivation have paths calculated from their content, struct DerivationType_ContentAddressed {
or from the derivation itself? */ bool pure;
bool derivationIsCA(DerivationType); bool fixed;
};
/* Is the content of the outputs fixed a-priori via a hash? Never true for typedef std::variant<
non-CA derivations. */ DerivationType_InputAddressed,
bool derivationIsFixed(DerivationType); DerivationType_ContentAddressed
> _DerivationTypeRaw;
/* Is the derivation impure and needs to access non-deterministic resources, or struct DerivationType : _DerivationTypeRaw {
pure and can be sandboxed? Note that whether or not we actually sandbox the using Raw = _DerivationTypeRaw;
derivation is controlled separately. Never true for non-CA derivations. */ using Raw::Raw;
bool derivationIsImpure(DerivationType); using InputAddressed = DerivationType_InputAddressed;
using ContentAddressed = DerivationType_ContentAddressed;
/* Does the derivation knows its own output paths?
* Only true when there's no floating-ca derivation involved in the closure. /* Do the outputs of the derivation have paths calculated from their content,
*/ or from the derivation itself? */
bool derivationHasKnownOutputPaths(DerivationType); bool isCA() const;
/* Is the content of the outputs fixed a-priori via a hash? Never true for
non-CA derivations. */
bool isFixed() const;
/* Is the derivation impure and needs to access non-deterministic resources, or
pure and can be sandboxed? Note that whether or not we actually sandbox the
derivation is controlled separately. Never true for non-CA derivations. */
bool isImpure() const;
/* Does the derivation knows its own output paths?
Only true when there's no floating-ca derivation involved in the
closure, or if fixed output.
*/
bool hasKnownOutputPaths() const;
inline const Raw & raw() const {
return static_cast<const Raw &>(*this);
}
};
struct BasicDerivation struct BasicDerivation
{ {
@ -189,11 +209,11 @@ typedef std::map<std::string, Hash> CaOutputHashes;
struct DrvHash { struct DrvHash {
Hash hash; Hash hash;
enum struct Kind { enum struct Kind: bool {
// Statically determined derivations. // Statically determined derivations.
// This hash will be directly used to compute the output paths // This hash will be directly used to compute the output paths
Regular, Regular,
// Floating-output derivations (and their dependencies). // Floating-output derivations (and their reverse dependencies).
Deferred, Deferred,
}; };

View file

@ -718,6 +718,7 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
/* Nothing to check */ /* Nothing to check */
}, },
[&](const DerivationOutput::Deferred &) { [&](const DerivationOutput::Deferred &) {
/* Nothing to check */
}, },
}, i.second.raw()); }, i.second.raw());
} }

View file

@ -93,7 +93,7 @@ StringSet ParsedDerivation::getRequiredSystemFeatures() const
StringSet res; StringSet res;
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
res.insert(i); res.insert(i);
if (!derivationHasKnownOutputPaths(drv.type())) if (!drv.type().hasKnownOutputPaths())
res.insert("ca-derivations"); res.insert("ca-derivations");
return res; return res;
} }