From 0abb3ad53795aa3a4792d30e5721a337f0eddfb7 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 30 Mar 2018 00:56:13 +0200 Subject: [PATCH] Allow content-addressable paths to have references This adds a command 'nix make-content-addressable' that rewrites the specified store paths into content-addressable paths. The advantage of such paths is that 1) they can be imported without signatures; 2) they can enable deduplication in cases where derivation changes do not cause output changes (apart from store path hashes). For example, $ nix make-content-addressable -r nixpkgs.cowsay rewrote '/nix/store/g1g31ah55xdia1jdqabv1imf6mcw0nb1-glibc-2.25-49' to '/nix/store/48jfj7bg78a8n4f2nhg269rgw1936vj4-glibc-2.25-49' ... rewrote '/nix/store/qbi6rzpk0bxjw8lw6azn2mc7ynnn455q-cowsay-3.03+dfsg1-16' to '/nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16' We can then copy the resulting closure to another store without signatures: $ nix copy --trusted-public-keys '' ---to ~/my-nix /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 In order to support self-references in content-addressable paths, these paths are hashed "modulo" self-references, meaning that self-references are zeroed out during hashing. Somewhat annoyingly, this means that the NAR hash stored in the Nix database is no longer necessarily equal to the output of "nix hash-path"; for content-addressable paths, you need to pass the --modulo flag: $ nix path-info --json /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 | jq -r .[].narHash sha256:0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 1ggznh07khq0hz6id09pqws3a8q9pn03ya3c03nwck1kwq8rclzs $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 --modulo iq6g2x4q62xp7y7493bibx0qn5w7xz67 0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw --- src/libstore/build.cc | 19 +------ src/libstore/local-store.cc | 21 ++++++-- src/libstore/references.cc | 62 ++++++++++++++++++++++ src/libstore/references.hh | 29 +++++++++- src/libstore/store-api.cc | 38 +++++++------ src/libstore/store-api.hh | 3 +- src/libutil/hash.cc | 20 ++----- src/libutil/hash.hh | 11 ++-- src/libutil/serialise.hh | 2 +- src/libutil/util.cc | 13 +++++ src/libutil/util.hh | 14 +++++ src/nix/command.cc | 2 +- src/nix/command.hh | 4 ++ src/nix/hash.cc | 22 +++++++- src/nix/make-content-addressable.cc | 82 +++++++++++++++++++++++++++++ src/nix/verify.cc | 12 +++-- 16 files changed, 289 insertions(+), 65 deletions(-) create mode 100644 src/nix/make-content-addressable.cc diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 0e0f8a545..68c2f2ce3 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -727,23 +727,6 @@ HookInstance::~HookInstance() ////////////////////////////////////////////////////////////////////// -typedef map StringRewrites; - - -std::string rewriteStrings(std::string s, const StringRewrites & rewrites) -{ - for (auto & i : rewrites) { - size_t j = 0; - while ((j = s.find(i.first, j)) != string::npos) - s.replace(j, i.first.size(), i.second); - } - return s; -} - - -////////////////////////////////////////////////////////////////////// - - typedef enum {rpAccept, rpDecline, rpPostpone} HookReply; class SubstitutionGoal; @@ -865,7 +848,7 @@ private: #endif /* Hash rewriting. */ - StringRewrites inputRewrites, outputRewrites; + StringMap inputRewrites, outputRewrites; typedef map RedirectedOutputs; RedirectedOutputs redirectedOutputs; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index d86f7b1d2..1280ecab5 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -5,6 +5,7 @@ #include "worker-protocol.hh" #include "derivations.hh" #include "nar-info.hh" +#include "references.hh" #include #include @@ -1009,17 +1010,21 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, /* While restoring the path from the NAR, compute the hash of the NAR. */ - HashSink hashSink(htSHA256); + std::unique_ptr hashSink; + if (info.ca == "") + hashSink = std::make_unique(htSHA256); + else + hashSink = std::make_unique(htSHA256, storePathToHash(info.path)); LambdaSource wrapperSource([&](unsigned char * data, size_t len) -> size_t { size_t n = source.read(data, len); - hashSink(data, n); + (*hashSink)(data, n); return n; }); restorePath(realPath, wrapperSource); - auto hashResult = hashSink.finish(); + auto hashResult = hashSink->finish(); if (hashResult.first != info.narHash) throw Error("hash mismatch importing path '%s';\n wanted: %s\n got: %s", @@ -1241,7 +1246,15 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) /* Check the content hash (optionally - slow). */ printMsg(lvlTalkative, format("checking contents of '%1%'") % i); - HashResult current = hashPath(info->narHash.type, toRealPath(i)); + + std::unique_ptr hashSink; + if (info->ca == "") + hashSink = std::make_unique(info->narHash.type); + else + hashSink = std::make_unique(info->narHash.type, storePathToHash(info->path)); + + dumpPath(toRealPath(i), *hashSink); + auto current = hashSink->finish(); if (info->narHash != nullHash && info->narHash != current.first) { printError(format("path '%1%' was modified! " diff --git a/src/libstore/references.cc b/src/libstore/references.cc index 0dcc264c3..605ca9815 100644 --- a/src/libstore/references.cc +++ b/src/libstore/references.cc @@ -118,4 +118,66 @@ PathSet scanForReferences(const string & path, } +RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink) + : from(from), to(to), nextSink(nextSink) +{ + assert(from.size() == to.size()); +} + +void RewritingSink::operator () (const unsigned char * data, size_t len) +{ + std::string s(prev); + s.append((const char *) data, len); + + size_t j = 0; + while ((j = s.find(from, j)) != string::npos) { + matches.push_back(pos + j); + s.replace(j, from.size(), to); + } + + prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1); + + auto consumed = s.size() - prev.size(); + + pos += consumed; + + if (consumed) nextSink((unsigned char *) s.data(), consumed); +} + +void RewritingSink::flush() +{ + if (prev.empty()) return; + pos += prev.size(); + nextSink((unsigned char *) prev.data(), prev.size()); + prev.clear(); +} + +HashModuloSink::HashModuloSink(HashType ht, const std::string & modulus) + : hashSink(ht) + , rewritingSink(modulus, std::string(modulus.size(), 0), hashSink) +{ +} + +void HashModuloSink::operator () (const unsigned char * data, size_t len) +{ + rewritingSink(data, len); +} + +HashResult HashModuloSink::finish() +{ + rewritingSink.flush(); + + /* Hash the positions of the self-references. This ensures that a + NAR with self-references and a NAR with some of the + self-references already zeroed out do not produce a hash + collision. FIXME: proof. */ + for (auto & pos : rewritingSink.matches) { + auto s = fmt("|%d", pos); + hashSink((unsigned char *) s.data(), s.size()); + } + + auto h = hashSink.finish(); + return {h.first, rewritingSink.pos}; +} + } diff --git a/src/libstore/references.hh b/src/libstore/references.hh index 013809d12..c38bdd720 100644 --- a/src/libstore/references.hh +++ b/src/libstore/references.hh @@ -7,5 +7,32 @@ namespace nix { PathSet scanForReferences(const Path & path, const PathSet & refs, HashResult & hash); - + +struct RewritingSink : Sink +{ + std::string from, to, prev; + Sink & nextSink; + uint64_t pos = 0; + + std::vector matches; + + RewritingSink(const std::string & from, const std::string & to, Sink & nextSink); + + void operator () (const unsigned char * data, size_t len) override; + + void flush(); +}; + +struct HashModuloSink : AbstractHashSink +{ + HashSink hashSink; + RewritingSink rewritingSink; + + HashModuloSink(HashType ht, const std::string & modulus); + + void operator () (const unsigned char * data, size_t len) override; + + HashResult finish() override; +}; + } diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index be13fa49a..1a1b24e3b 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -205,15 +205,27 @@ Path Store::makeOutputPath(const string & id, } -Path Store::makeFixedOutputPath(bool recursive, - const Hash & hash, const string & name) const +static std::string makeType(string && type, const PathSet & references) { - return hash.type == htSHA256 && recursive - ? makeStorePath("source", hash, name) - : makeStorePath("output:out", hashString(htSHA256, + for (auto & i : references) { + type += ":"; + type += i; + } + return type; +} + + +Path Store::makeFixedOutputPath(bool recursive, + const Hash & hash, const string & name, const PathSet & references) const +{ + if (hash.type == htSHA256 && recursive) { + return makeStorePath(makeType("source", references), hash, name); + } else { + assert(references.empty()); + return makeStorePath("output:out", hashString(htSHA256, "fixed:out:" + (recursive ? (string) "r:" : "") + - hash.to_string(Base16) + ":"), - name); + hash.to_string(Base16) + ":"), name); + } } @@ -224,12 +236,7 @@ Path Store::makeTextPath(const string & name, const Hash & hash, /* Stuff the references (if any) into the type. This is a bit hacky, but we can't put them in `s' since that would be ambiguous. */ - string type = "text"; - for (auto & i : references) { - type += ":"; - type += i; - } - return makeStorePath(type, hash, name); + return makeStorePath(makeType("text", references), hash, name); } @@ -785,8 +792,9 @@ bool ValidPathInfo::isContentAddressed(const Store & store) const else if (hasPrefix(ca, "fixed:")) { bool recursive = ca.compare(6, 2, "r:") == 0; Hash hash(std::string(ca, recursive ? 8 : 6)); - if (references.empty() && - store.makeFixedOutputPath(recursive, hash, storePathToName(path)) == path) + auto refs = references; + replaceInSet(refs, path, std::string("self")); + if (store.makeFixedOutputPath(recursive, hash, storePathToName(path), refs) == path) return true; else warn(); diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index ba8990755..250e1a4bc 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -303,7 +303,8 @@ public: const Hash & hash, const string & name) const; Path makeFixedOutputPath(bool recursive, - const Hash & hash, const string & name) const; + const Hash & hash, const string & name, + const PathSet & references = {}) const; Path makeTextPath(const string & name, const Hash & hash, const PathSet & references) const; diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index 1c14ebb18..7caee1da7 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -256,23 +256,9 @@ Hash hashString(HashType ht, const string & s) Hash hashFile(HashType ht, const Path & path) { - Ctx ctx; - Hash hash(ht); - start(ht, ctx); - - AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); - if (!fd) throw SysError(format("opening file '%1%'") % path); - - std::vector buf(8192); - ssize_t n; - while ((n = read(fd.get(), buf.data(), buf.size()))) { - checkInterrupt(); - if (n == -1) throw SysError(format("reading file '%1%'") % path); - update(ht, ctx, buf.data(), n); - } - - finish(ht, ctx, hash.hash); - return hash; + HashSink sink(ht); + readFile(path, sink); + return sink.finish().first; } diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh index 2dbc3b630..ffa43ecf5 100644 --- a/src/libutil/hash.hh +++ b/src/libutil/hash.hh @@ -111,7 +111,12 @@ string printHashType(HashType ht); union Ctx; -class HashSink : public BufferedSink +struct AbstractHashSink : virtual Sink +{ + virtual HashResult finish() = 0; +}; + +class HashSink : public BufferedSink, public AbstractHashSink { private: HashType ht; @@ -122,8 +127,8 @@ public: HashSink(HashType ht); HashSink(const HashSink & h); ~HashSink(); - void write(const unsigned char * data, size_t len); - HashResult finish(); + void write(const unsigned char * data, size_t len) override; + HashResult finish() override; HashResult currentHash(); }; diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index a344a5ac7..0120aeecb 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -24,7 +24,7 @@ struct Sink /* A buffered abstract sink. */ -struct BufferedSink : Sink +struct BufferedSink : virtual Sink { size_t bufSize, bufPos; std::unique_ptr buffer; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 6f3bf7ae8..2e416edef 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -1260,6 +1260,19 @@ string replaceStrings(const std::string & s, } +std::string rewriteStrings(const std::string & _s, const StringMap & rewrites) +{ + auto s = _s; + for (auto & i : rewrites) { + if (i.first == i.second) continue; + size_t j = 0; + while ((j = s.find(i.first, j)) != string::npos) + s.replace(j, i.first.size(), i.second); + } + return s; +} + + string statusToString(int status) { if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { diff --git a/src/libutil/util.hh b/src/libutil/util.hh index f057fdb2c..3493e80b5 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -362,6 +362,20 @@ string replaceStrings(const std::string & s, const std::string & from, const std::string & to); +std::string rewriteStrings(const std::string & s, const StringMap & rewrites); + + +/* If a set contains 'from', remove it and insert 'to'. */ +template +void replaceInSet(std::set & set, const T & from, const T & to) +{ + auto i = set.find(from); + if (i == set.end()) return; + set.erase(i); + set.insert(to); +} + + /* Convert the exit status of a child as returned by wait() into an error string. */ string statusToString(int status); diff --git a/src/nix/command.cc b/src/nix/command.cc index 3d7d582d6..532f331a7 100644 --- a/src/nix/command.cc +++ b/src/nix/command.cc @@ -129,7 +129,7 @@ void StorePathsCommand::run(ref store) } else { - for (auto & p : toStorePaths(store, NoBuild, installables)) + for (auto & p : toStorePaths(store, realiseMode, installables)) storePaths.push_back(p); if (recursive) { diff --git a/src/nix/command.hh b/src/nix/command.hh index 97a6fee7f..45ad1cd2a 100644 --- a/src/nix/command.hh +++ b/src/nix/command.hh @@ -139,6 +139,10 @@ private: bool recursive = false; bool all = false; +protected: + + RealiseMode realiseMode = NoBuild; + public: StorePathsCommand(bool recursive = false); diff --git a/src/nix/hash.cc b/src/nix/hash.cc index af4105e28..9c06e6116 100644 --- a/src/nix/hash.cc +++ b/src/nix/hash.cc @@ -2,6 +2,8 @@ #include "hash.hh" #include "legacy.hh" #include "shared.hh" +#include "references.hh" +#include "archive.hh" using namespace nix; @@ -13,6 +15,7 @@ struct CmdHash : Command bool truncate = false; HashType ht = htSHA256; std::vector paths; + std::experimental::optional modulus; CmdHash(Mode mode) : mode(mode) { @@ -23,6 +26,11 @@ struct CmdHash : Command mkFlag() .longName("type") .mkHashTypeFlag(&ht); + mkFlag() + .longName("modulo") + .description("compute hash modulo specified string") + .labels({"modulus"}) + .dest(&modulus); expectArgs("paths", &paths); } @@ -41,7 +49,19 @@ struct CmdHash : Command void run() override { for (auto path : paths) { - Hash h = mode == mFile ? hashFile(ht, path) : hashPath(ht, path).first; + + std::unique_ptr hashSink; + if (modulus) + hashSink = std::make_unique(ht, *modulus); + else + hashSink = std::make_unique(ht); + + if (mode == mFile) + readFile(path, *hashSink); + else + dumpPath(path, *hashSink); + + Hash h = hashSink->finish().first; if (truncate && h.hashSize > 20) h = compressHash(h, 20); std::cout << format("%1%\n") % h.to_string(base, base == SRI); diff --git a/src/nix/make-content-addressable.cc b/src/nix/make-content-addressable.cc new file mode 100644 index 000000000..14ed06413 --- /dev/null +++ b/src/nix/make-content-addressable.cc @@ -0,0 +1,82 @@ +#include "command.hh" +#include "store-api.hh" +#include "references.hh" + +using namespace nix; + +struct CmdMakeContentAddressable : StorePathsCommand +{ + CmdMakeContentAddressable() + { + realiseMode = Build; + } + + std::string name() override + { + return "make-content-addressable"; + } + + std::string description() override + { + return "test"; + } + + void run(ref store, Paths storePaths) override + { + auto paths = store->topoSortPaths(PathSet(storePaths.begin(), storePaths.end())); + + paths.reverse(); + + std::map remappings; + + for (auto & path : paths) { + auto oldInfo = store->queryPathInfo(path); + auto oldHashPart = storePathToHash(path); + auto name = storePathToName(path); + + StringSink sink; + store->narFromPath(path, sink); + + StringMap rewrites; + + ValidPathInfo info; + for (auto & ref : oldInfo->references) { + if (ref == path) + info.references.insert("self"); + else { + auto replacement = get(remappings, ref, ref); + // FIXME: warn about unremapped paths? + info.references.insert(replacement); + if (replacement != ref) + rewrites[storePathToHash(ref)] = storePathToHash(replacement); + } + } + + *sink.s = rewriteStrings(*sink.s, rewrites); + + HashModuloSink hashModuloSink(htSHA256, oldHashPart); + hashModuloSink((unsigned char *) sink.s->data(), sink.s->size()); + + info.narHash = hashModuloSink.finish().first; + info.narSize = sink.s->size(); + replaceInSet(info.references, path, std::string("self")); + info.path = store->makeFixedOutputPath(true, info.narHash, name, info.references); + replaceInSet(info.references, std::string("self"), info.path); + info.ca = makeFixedOutputCA(true, info.narHash); + + printError("rewrote '%s' to '%s'", path, info.path); + + auto source = sinkToSource([&](Sink & nextSink) { + RewritingSink rsink2(oldHashPart, storePathToHash(info.path), nextSink); + rsink2((unsigned char *) sink.s->data(), sink.s->size()); + rsink2.flush(); + }); + + store->addToStore(info, *source); + + remappings[path] = info.path; + } + } +}; + +static RegisterCommand r1(make_ref()); diff --git a/src/nix/verify.cc b/src/nix/verify.cc index 74d9673b6..4b0f80c62 100644 --- a/src/nix/verify.cc +++ b/src/nix/verify.cc @@ -3,6 +3,7 @@ #include "store-api.hh" #include "sync.hh" #include "thread-pool.hh" +#include "references.hh" #include @@ -88,10 +89,15 @@ struct CmdVerify : StorePathsCommand if (!noContents) { - HashSink sink(info->narHash.type); - store->narFromPath(info->path, sink); + std::unique_ptr hashSink; + if (info->ca == "") + hashSink = std::make_unique(info->narHash.type); + else + hashSink = std::make_unique(info->narHash.type, storePathToHash(info->path)); - auto hash = sink.finish(); + store->narFromPath(info->path, *hashSink); + + auto hash = hashSink->finish(); if (hash.first != info->narHash) { corrupted++;