Generalize DerivationType in preparation for impure derivations

2024-11-22 23:02:27 +00:00 · 2022-03-18 00:36:52 +00:00 · 2022-03-18 00:36:52 +00:00 · a544ed7684
commit a544ed7684
parent 049fae155a
9 changed files with 148 additions and 95 deletions
--- a/src/build-remote/build-remote.cc
+++ b/src/build-remote/build-remote.cc
@ -300,7 +300,7 @@ connected:

        std::set<Realisation> missingRealisations;
        StorePathSet missingPaths;
-        if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !derivationHasKnownOutputPaths(drv.type())) {
+        if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !drv.type().hasKnownOutputPaths()) {
            for (auto & outputName : wantedOutputs) {
                auto thisOutputHash = outputHashes.at(outputName);
                auto thisOutputId = DrvOutput{ thisOutputHash, outputName };
--- a/src/libexpr/primops.cc
+++ b/src/libexpr/primops.cc
@ -1235,11 +1235,8 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *

    /* Optimisation, but required in read-only mode! because in that
       case we don't actually write store derivations, so we can't
-       read them later.
-
-       However, we don't bother doing this for floating CA derivations because
-       their "hash modulo" is indeterminate until built. */
-    if (drv.type() != DerivationType::CAFloating) {
+       read them later. */
+    {
        auto h = hashDerivationModulo(*state.store, drv, false);
        drvHashes.lock()->insert_or_assign(drvPath, h);
    }
--- a/src/libstore/build/derivation-goal.cc
+++ b/src/libstore/build/derivation-goal.cc
@ -204,7 +204,7 @@ void DerivationGoal::haveDerivation()
 {
    trace("have derivation");

-    if (drv->type() == DerivationType::CAFloating)
+    if (!drv->type().hasKnownOutputPaths())
        settings.requireExperimentalFeature(Xp::CaDerivations);

    retrySubstitution = false;
@ -440,9 +440,28 @@ void DerivationGoal::inputsRealised()
    if (useDerivation) {
        auto & fullDrv = *dynamic_cast<Derivation *>(drv.get());

-        if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) &&
-            ((!fullDrv.inputDrvs.empty() && derivationIsCA(fullDrv.type()))
-            || fullDrv.type() == DerivationType::DeferredInputAddressed)) {
+        auto drvType = fullDrv.type();
+        bool resolveDrv = std::visit(overloaded {
+            [&](const DerivationType::InputAddressed & ia) {
+                /* must resolve if deferred. */
+                return ia.deferred;
+            },
+            [&](const DerivationType::ContentAddressed & ca) {
+                return !fullDrv.inputDrvs.empty() && (
+                    ca.fixed
+                    /* Can optionally resolve if fixed, which is good
+                       for avoiding unnecessary rebuilds. */
+                    ? settings.isExperimentalFeatureEnabled(Xp::CaDerivations)
+                    /* Must resolve if floating and there are any inputs
+                       drvs. */
+                    : true);
+            },
+        }, drvType.raw());
+
+        if (resolveDrv)
+        {
+            settings.requireExperimentalFeature(Xp::CaDerivations);
+
            /* We are be able to resolve this derivation based on the
               now-known results of dependencies. If so, we become a stub goal
               aliasing that resolved derivation goal */
@ -501,7 +520,7 @@ void DerivationGoal::inputsRealised()

    /* Don't repeat fixed-output derivations since they're already
       verified by their output hash.*/
-    nrRounds = derivationIsFixed(derivationType) ? 1 : settings.buildRepeat + 1;
+    nrRounds = derivationType.isFixed() ? 1 : settings.buildRepeat + 1;

    /* Okay, try to build.  Note that here we don't wait for a build
       slot to become available, since we don't need one if there is a
@ -908,7 +927,7 @@ void DerivationGoal::buildDone()
            st =
                dynamic_cast<NotDeterministic*>(&e) ? BuildResult::NotDeterministic :
                statusOk(status) ? BuildResult::OutputRejected :
-                derivationIsImpure(derivationType) || diskFull ? BuildResult::TransientFailure :
+                derivationType.isImpure() || diskFull ? BuildResult::TransientFailure :
                BuildResult::PermanentFailure;
        }

@ -1221,7 +1240,7 @@ void DerivationGoal::flushLine()

 std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDerivationOutputMap()
 {
-    if (!useDerivation || drv->type() != DerivationType::CAFloating) {
+    if (!useDerivation || drv->type().hasKnownOutputPaths()) {
        std::map<std::string, std::optional<StorePath>> res;
        for (auto & [name, output] : drv->outputs)
            res.insert_or_assign(name, output.path(worker.store, drv->name, name));
@ -1233,7 +1252,7 @@ std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDeri

 OutputPathMap DerivationGoal::queryDerivationOutputMap()
 {
-    if (!useDerivation || drv->type() != DerivationType::CAFloating) {
+    if (!useDerivation || drv->type().hasKnownOutputPaths()) {
        OutputPathMap res;
        for (auto & [name, output] : drv->outputsAndOptPaths(worker.store))
            res.insert_or_assign(name, *output.second);
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@ -395,7 +395,7 @@ void LocalDerivationGoal::startBuilder()
        else if (settings.sandboxMode == smDisabled)
            useChroot = false;
        else if (settings.sandboxMode == smRelaxed)
-            useChroot = !(derivationIsImpure(derivationType)) && !noChroot;
+            useChroot = !(derivationType.isImpure()) && !noChroot;
    }

    auto & localStore = getLocalStore();
@ -608,7 +608,7 @@ void LocalDerivationGoal::startBuilder()
                "nogroup:x:65534:\n", sandboxGid()));

        /* Create /etc/hosts with localhost entry. */
-        if (!(derivationIsImpure(derivationType)))
+        if (!(derivationType.isImpure()))
            writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n::1 localhost\n");

        /* Make the closure of the inputs available in the chroot,
@ -796,7 +796,7 @@ void LocalDerivationGoal::startBuilder()
           us.
        */

-        if (!(derivationIsImpure(derivationType)))
+        if (!(derivationType.isImpure()))
            privateNetwork = true;

        userNamespaceSync.create();
@ -1049,7 +1049,7 @@ void LocalDerivationGoal::initEnv()
       derivation, tell the builder, so that for instance `fetchurl'
       can skip checking the output.  On older Nixes, this environment
       variable won't be set, so `fetchurl' will do the check. */
-    if (derivationIsFixed(derivationType)) env["NIX_OUTPUT_CHECKED"] = "1";
+    if (derivationType.isFixed()) env["NIX_OUTPUT_CHECKED"] = "1";

    /* *Only* if this is a fixed-output derivation, propagate the
       values of the environment variables specified in the
@ -1060,7 +1060,7 @@ void LocalDerivationGoal::initEnv()
       to the builder is generally impure, but the output of
       fixed-output derivations is by definition pure (since we
       already know the cryptographic hash of the output). */
-    if (derivationIsImpure(derivationType)) {
+    if (derivationType.isImpure()) {
        for (auto & i : parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings()))
            env[i] = getEnv(i).value_or("");
    }
@ -1674,7 +1674,7 @@ void LocalDerivationGoal::runChild()
            /* Fixed-output derivations typically need to access the
               network, so give them access to /etc/resolv.conf and so
               on. */
-            if (derivationIsImpure(derivationType)) {
+            if (derivationType.isImpure()) {
                // Only use nss functions to resolve hosts and
                // services. Don’t use it for anything else that may
                // be configured for this system. This limits the
@ -1918,7 +1918,7 @@ void LocalDerivationGoal::runChild()

                sandboxProfile += "(import \"sandbox-defaults.sb\")\n";

-                if (derivationIsImpure(derivationType))
+                if (derivationType.isImpure())
                    sandboxProfile += "(import \"sandbox-network.sb\")\n";

                /* Add the output paths we'll use at build-time to the chroot */
--- a/src/libstore/daemon.cc
+++ b/src/libstore/daemon.cc
@ -560,6 +560,8 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
        BuildMode buildMode = (BuildMode) readInt(from);
        logger->startWork();

+        auto drvType = drv.type();
+
        /* Content-addressed derivations are trustless because their output paths
           are verified by their content alone, so any derivation is free to
           try to produce such a path.
@ -592,12 +594,12 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
           derivations, we throw out the precomputed output paths and just
           store the hashes, so there aren't two competing sources of truth an
           attacker could exploit. */
-        if (drv.type() == DerivationType::InputAddressed && !trusted)
+        if (!(drvType.isCA() || trusted))
            throw Error("you are not privileged to build input-addressed derivations");

        /* Make sure that the non-input-addressed derivations that got this far
           are in fact content-addressed if we don't trust them. */
-        assert(derivationIsCA(drv.type()) || trusted);
+        assert(drvType.isCA() || trusted);

        /* Recompute the derivation path when we cannot trust the original. */
        if (!trusted) {
@ -606,7 +608,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
               original not-necessarily-resolved derivation to verify the drv
               derivation as adequate claim to the input-addressed output
               paths. */
-            assert(derivationIsCA(drv.type()));
+            assert(drvType.isCA());

            Derivation drv2;
            static_cast<BasicDerivation &>(drv2) = drv;
--- a/src/libstore/derivations.cc
+++ b/src/libstore/derivations.cc
@ -36,47 +36,46 @@ StorePath DerivationOutput::CAFixed::path(const Store & store, std::string_view
 }


-bool derivationIsCA(DerivationType dt) {
-    switch (dt) {
-    case DerivationType::InputAddressed: return false;
-    case DerivationType::CAFixed: return true;
-    case DerivationType::CAFloating: return true;
-    case DerivationType::DeferredInputAddressed: return false;
-    };
-    // Since enums can have non-variant values, but making a `default:` would
-    // disable exhaustiveness warnings.
-    assert(false);
+bool DerivationType::isCA() const {
+    /* Normally we do the full `std::visit` to make sure we have
+       exhaustively handled all variants, but so long as there is a
+       variant called `ContentAddressed`, it must be the only one for
+       which `isCA` is true for this to make sense!. */
+    return std::holds_alternative<ContentAddressed>(raw());
 }

-bool derivationIsFixed(DerivationType dt) {
-    switch (dt) {
-    case DerivationType::InputAddressed: return false;
-    case DerivationType::CAFixed: return true;
-    case DerivationType::CAFloating: return false;
-    case DerivationType::DeferredInputAddressed: return false;
-    };
-    assert(false);
+bool DerivationType::isFixed() const {
+    return std::visit(overloaded {
+        [](const InputAddressed & ia) {
+            return false;
+        },
+        [](const ContentAddressed & ca) {
+            return ca.fixed;
+        },
+    }, raw());
 }

-bool derivationHasKnownOutputPaths(DerivationType dt) {
-    switch (dt) {
-    case DerivationType::InputAddressed: return true;
-    case DerivationType::CAFixed: return true;
-    case DerivationType::CAFloating: return false;
-    case DerivationType::DeferredInputAddressed: return false;
-    };
-    assert(false);
+bool DerivationType::hasKnownOutputPaths() const {
+    return std::visit(overloaded {
+        [](const InputAddressed & ia) {
+            return !ia.deferred;
+        },
+        [](const ContentAddressed & ca) {
+            return ca.fixed;
+        },
+    }, raw());
 }


-bool derivationIsImpure(DerivationType dt) {
-    switch (dt) {
-    case DerivationType::InputAddressed: return false;
-    case DerivationType::CAFixed: return true;
-    case DerivationType::CAFloating: return false;
-    case DerivationType::DeferredInputAddressed: return false;
-    };
-    assert(false);
+bool DerivationType::isImpure() const {
+    return std::visit(overloaded {
+        [](const InputAddressed & ia) {
+            return false;
+        },
+        [](const ContentAddressed & ca) {
+            return !ca.pure;
+        },
+    }, raw());
 }


@ -439,18 +438,28 @@ DerivationType BasicDerivation::type() const
    if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
        throw Error("Must have at least one output");
    } else if (! inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
-        return DerivationType::InputAddressed;
+        return DerivationType::InputAddressed {
+            .deferred = false,
+        };
    } else if (inputAddressedOutputs.empty() && ! fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
        if (fixedCAOutputs.size() > 1)
            // FIXME: Experimental feature?
            throw Error("Only one fixed output is allowed for now");
        if (*fixedCAOutputs.begin() != "out")
            throw Error("Single fixed output must be named \"out\"");
-        return DerivationType::CAFixed;
+        return DerivationType::ContentAddressed {
+            .pure = false,
+            .fixed = true,
+        };
    } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && ! floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
-        return DerivationType::CAFloating;
+        return DerivationType::ContentAddressed {
+            .pure = true,
+            .fixed = false,
+        };
    } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && !deferredIAOutputs.empty()) {
-        return DerivationType::DeferredInputAddressed;
+        return DerivationType::InputAddressed {
+            .deferred = true,
+        };
    } else {
        throw Error("Can't mix derivation output types");
    }
@ -502,10 +511,10 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath &
 */
 DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
 {
-    auto kind = DrvHash::Kind::Regular;
+    auto type = drv.type();
+
    /* Return a fixed hash for fixed-output derivations. */
-    switch (drv.type()) {
-    case DerivationType::CAFixed: {
+    if (type.isFixed()) {
        std::map<std::string, Hash> outputHashes;
        for (const auto & i : drv.outputs) {
            auto & dof = std::get<DerivationOutput::CAFixed>(i.second.raw());
@ -517,14 +526,19 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
        }
        return outputHashes;
    }
-    case DerivationType::CAFloating:
-        kind = DrvHash::Kind::Deferred;
-        break;
-    case DerivationType::InputAddressed:
-        break;
-    case DerivationType::DeferredInputAddressed:
-        break;
-    }
+
+    auto kind = std::visit(overloaded {
+        [](const DerivationType::InputAddressed & ia) {
+            /* This might be a "pesimistically" deferred output, so we don't
+               "taint" the kind yet. */
+            return DrvHash::Kind::Regular;
+        },
+        [](const DerivationType::ContentAddressed & ca) {
+            return ca.fixed
+                ? DrvHash::Kind::Regular
+                : DrvHash::Kind::Deferred;
+        },
+    }, drv.type().raw());

    /* For other derivations, replace the inputs paths with recursive
       calls to this function. */
--- a/src/libstore/derivations.hh
+++ b/src/libstore/derivations.hh
@ -85,30 +85,50 @@ typedef std::map<std::string, std::pair<DerivationOutput, std::optional<StorePat
   output IDs we are interested in. */
 typedef std::map<StorePath, StringSet> DerivationInputs;

-enum struct DerivationType : uint8_t {
-    InputAddressed,
-    DeferredInputAddressed,
-    CAFixed,
-    CAFloating,
+struct DerivationType_InputAddressed {
+    bool deferred;
 };

-/* Do the outputs of the derivation have paths calculated from their content,
-   or from the derivation itself? */
-bool derivationIsCA(DerivationType);
+struct DerivationType_ContentAddressed {
+    bool pure;
+    bool fixed;
+};

-/* Is the content of the outputs fixed a-priori via a hash? Never true for
-   non-CA derivations. */
-bool derivationIsFixed(DerivationType);
+typedef std::variant<
+    DerivationType_InputAddressed,
+    DerivationType_ContentAddressed
+> _DerivationTypeRaw;

-/* Is the derivation impure and needs to access non-deterministic resources, or
-   pure and can be sandboxed? Note that whether or not we actually sandbox the
-   derivation is controlled separately. Never true for non-CA derivations. */
-bool derivationIsImpure(DerivationType);
+struct DerivationType : _DerivationTypeRaw {
+    using Raw = _DerivationTypeRaw;
+    using Raw::Raw;
+    using InputAddressed = DerivationType_InputAddressed;
+    using ContentAddressed = DerivationType_ContentAddressed;

-/* Does the derivation knows its own output paths?
- * Only true when there's no floating-ca derivation involved in the closure.
- */
-bool derivationHasKnownOutputPaths(DerivationType);
+
+    /* Do the outputs of the derivation have paths calculated from their content,
+       or from the derivation itself? */
+    bool isCA() const;
+
+    /* Is the content of the outputs fixed a-priori via a hash? Never true for
+       non-CA derivations. */
+    bool isFixed() const;
+
+    /* Is the derivation impure and needs to access non-deterministic resources, or
+       pure and can be sandboxed? Note that whether or not we actually sandbox the
+       derivation is controlled separately. Never true for non-CA derivations. */
+    bool isImpure() const;
+
+    /* Does the derivation knows its own output paths?
+       Only true when there's no floating-ca derivation involved in the
+       closure, or if fixed output.
+     */
+    bool hasKnownOutputPaths() const;
+
+    inline const Raw & raw() const {
+        return static_cast<const Raw &>(*this);
+    }
+};

 struct BasicDerivation
 {
@ -189,11 +209,11 @@ typedef std::map<std::string, Hash> CaOutputHashes;
 struct DrvHash {
    Hash hash;

-    enum struct Kind {
+    enum struct Kind: bool {
        // Statically determined derivations.
        // This hash will be directly used to compute the output paths
        Regular,
-        // Floating-output derivations (and their dependencies).
+        // Floating-output derivations (and their reverse dependencies).
        Deferred,
    };

--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@ -718,6 +718,7 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
                /* Nothing to check */
            },
            [&](const DerivationOutput::Deferred &) {
+                /* Nothing to check */
            },
        }, i.second.raw());
    }
--- a/src/libstore/parsed-derivations.cc
+++ b/src/libstore/parsed-derivations.cc
@ -93,7 +93,7 @@ StringSet ParsedDerivation::getRequiredSystemFeatures() const
    StringSet res;
    for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
        res.insert(i);
-    if (!derivationHasKnownOutputPaths(drv.type()))
+    if (!drv.type().hasKnownOutputPaths())
        res.insert("ca-derivations");
    return res;
 }