* Equivalence class consolidation. This solves the problem that when

we combine closures built by different users, the resulting set may contain multiple paths from the same output path equivalence class. For instance, if we do $ NIX_USER_ID=foo nix-env -i libXext $ NIX_USER_ID=root nix-env -i libXt $ NIX_USER_ID=foo nix-env -i libXmu (where libXmu depends on libXext and libXt, who both depend on libX11), then the following will happen: * User foo builds libX11 and libXext because they don't exist yet. * User root builds libX11 and libXt because the latter doesn't exist yet, while the former *does* exist but cannot be trusted. The instance of libX11 built by root will almost certainly differ from the one built by foo, so they are stored in separate locations. * User foo builds libXmu, which requires libXext and libXt. Foo has trusted copies of both (libXext was built by himself, while libXt was built by root, who is trusted by foo). So libXmu is built with foo's libXext and root's libXt as inputs. * The resulting libXmu will link against two copies of libX11, namely the one used by foo's libXext and the one used by root's libXt. This is bad semantically (it's observable behaviour, and might well lead to build time or runtime failure (e.g., duplicate definitions of symbols)) and in terms of efficiency (the closure of libXmu contains two copies of libX11, so both must be deployed). The problem is to apply hash rewriting to "consolidate" the set of input paths to a build. The invariant we wish to maintain is that any closure may contain at most one path from each equivalence class. So in the case of a collision, we select one path from each class, and *rewrite* all paths in that set to point only to paths in that set. For instance, in the example above, we can rewrite foo's libXext to link against root's libX11. That is, the hash part of foo's libX11 is replaced by the hash part of root's libX11. The hard part is to figure out which path to select from each class. Some selections may be cheaper than others (i.e., require fewer rewrites). The current implementation is rather dumb: it tries all possible selections, and picks the cheapest. This is an exponential time algorithm. There certainly are more efficient common-case (heuristical) approaches. But I don't know yet if there is a worst-case polynomial time algorithm.
2005-05-30 10:49:00 +00:00 · 2005-05-30 10:49:00 +00:00 · b119dd279e
parent 4f83146459
commit b119dd279e
7 changed files with 231 additions and 24 deletions
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@ -720,19 +720,6 @@ PathSet outputPaths(const DerivationOutputs & outputs)
 #endif


-string showPaths(const PathSet & paths)
-{
-    string s;
-    for (PathSet::const_iterator i = paths.begin();
-         i != paths.end(); ++i)
-    {
-        if (s.size() != 0) s += ", ";
-        s += "`" + *i + "'";
-    }
-    return s;
-}
-
-
 DerivationGoal::HookReply DerivationGoal::tryBuildHook()
 {
    return rpDecline;
@ -997,6 +984,15 @@ bool DerivationGoal::prepareBuild()
         i != drv.inputSrcs.end(); ++i)
        computeFSClosure(*i, inputPaths);

+    /* There might be equivalence class collisions now.  That is,
+       different input closures might contain different paths from the
+       *same* output path equivalence class.  We should pick one from
+       each, and rewrite dependent paths. */
+    inputPaths = consolidatePaths(inputPaths, false);
+
+    /* !!! remove, debug only */
+    consolidatePaths(inputPaths, true);
+    
    printMsg(lvlError, format("added input paths %1%") % showPaths(inputPaths)); /* !!! */

    allPaths.insert(inputPaths.begin(), inputPaths.end());
--- a/src/libstore/misc.cc
+++ b/src/libstore/misc.cc
@ -1,4 +1,5 @@
 #include "build.hh"
+#include "misc.hh"


 Derivation derivationFromPath(const Path & drvPath)
@ -49,3 +50,158 @@ Path findTrustedEqClassMember(const OutputEqClass & eqClass,

    return "";
 }
+
+
+typedef map<OutputEqClass, PathSet> ClassMap;
+typedef map<OutputEqClass, Path> FinalClassMap;
+
+
+static void findBestRewrite(const ClassMap::const_iterator & pos,
+    const ClassMap::const_iterator & end,
+    const PathSet & selection, const PathSet & unselection,
+    unsigned int & bestCost, PathSet & bestSelection)
+{
+    if (pos != end) {
+        for (PathSet::iterator i = pos->second.begin();
+             i != pos->second.end(); ++i)
+        {
+            PathSet selection2(selection);
+            selection2.insert(*i);
+            
+            PathSet unselection2(unselection);
+            for (PathSet::iterator j = pos->second.begin();
+                 j != pos->second.end(); ++j)
+                if (i != j) unselection2.insert(*j);
+            
+            ClassMap::const_iterator j = pos; ++j;
+            findBestRewrite(j, end, selection2, unselection2,
+                bestCost, bestSelection);
+        }
+        return;
+    }
+
+    //    printMsg(lvlError, format("selection %1%") % showPaths(selection));
+    
+    PathSet badPaths;
+    for (PathSet::iterator i = selection.begin();
+         i != selection.end(); ++i)
+    {
+        PathSet closure;
+        computeFSClosure(*i, closure); 
+        for (PathSet::iterator j = closure.begin();
+             j != closure.end(); ++j)
+            if (unselection.find(*j) != unselection.end())
+                badPaths.insert(*i);
+    }
+    
+    printMsg(lvlError, format("cost %1% %2%") % badPaths.size() % showPaths(badPaths));
+
+    if (badPaths.size() < bestCost) {
+        bestCost = badPaths.size();
+        bestSelection = selection;
+    }
+}
+
+
+static Path maybeRewrite(const Path & path, const PathSet & selection,
+    const FinalClassMap & finalClassMap)
+{
+    assert(selection.find(path) != selection.end());
+    
+    PathSet references;
+    queryReferences(noTxn, path, references);
+
+    HashRewrites rewrites;
+    
+    bool okay = true;
+    for (PathSet::iterator i = references.begin(); i != references.end(); ++i) {
+        if (*i == path) continue; /* ignore self-references */
+        if (selection.find(*i) == selection.end()) {
+            OutputEqClasses classes;
+            queryOutputEqClasses(noTxn, *i, classes);
+            
+            if (classes.size() > 0) /* !!! hacky; ignore sources; they
+                                       are not in any eq class */
+            {
+                printMsg(lvlError, format("in `%1%': missing `%2%'") % path % *i);
+                okay = false;
+
+                FinalClassMap::const_iterator j = finalClassMap.find(*(classes.begin()));
+                assert(j != finalClassMap.end());
+
+                printMsg(lvlError, format("replacing with `%1%'") % j->second);
+                
+                Path newPath = maybeRewrite(j->second, selection, finalClassMap);
+                if (*i != newPath)
+                    rewrites[hashPartOf(*i)] = hashPartOf(newPath);
+            }
+        }
+    }
+
+    if (rewrites.size() == 0) return path;
+
+    printMsg(lvlError, format("rewriting `%1%'") % path);
+
+    Path newPath = addToStore(path,
+        hashPartOf(path), namePartOf(path),
+        references, rewrites);
+
+    printMsg(lvlError, format("rewrote `%1%' to `%2%'") % path % newPath);
+
+    return newPath;
+}
+
+
+PathSet consolidatePaths(const PathSet & paths, bool checkOnly)
+{
+    printMsg(lvlError, format("consolidating"));
+    
+    ClassMap classMap;
+    
+    for (PathSet::const_iterator i = paths.begin(); i != paths.end(); ++i) {
+        OutputEqClasses classes;
+        queryOutputEqClasses(noTxn, *i, classes);
+
+        /* !!! deal with sources */
+        
+        for (OutputEqClasses::iterator j = classes.begin(); j != classes.end(); ++j) {
+            classMap[*j].insert(*i);
+        }
+    }
+
+    bool conflict = false;
+    for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
+        if (i->second.size() >= 2) {
+            printMsg(lvlError, format("conflict in eq class `%1%'") % i->first);
+            conflict = true;
+        }
+
+    if (!conflict) return paths;
+    
+    assert(!checkOnly);
+    
+    /* !!! exponential-time algorithm! */
+    const unsigned int infinity = 1000000;
+    unsigned int bestCost = infinity;
+    PathSet bestSelection;
+    findBestRewrite(classMap.begin(), classMap.end(),
+        PathSet(), PathSet(), bestCost, bestSelection);
+
+    assert(bestCost != infinity);
+
+    printMsg(lvlError, format("cheapest selection %1% %2%")
+        % bestCost % showPaths(bestSelection));
+
+    FinalClassMap finalClassMap;
+    for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
+        for (PathSet::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
+            if (bestSelection.find(*j) != bestSelection.end())
+                finalClassMap[i->first] = *j;
+
+    PathSet newPaths;
+    for (PathSet::iterator i = bestSelection.begin();
+         i != bestSelection.end(); ++i)
+        newPaths.insert(maybeRewrite(*i, bestSelection, finalClassMap));
+    
+    return newPaths;
+}
--- a/src/libstore/misc.hh
+++ b/src/libstore/misc.hh
@ -33,4 +33,7 @@ Path findTrustedEqClassMember(const OutputEqClass & eqClass,
    const TrustId & trustId);


+PathSet consolidatePaths(const PathSet & paths, bool checkOnly);
+
+
 #endif /* !__MISC_H */
--- a/src/libstore/store.cc
+++ b/src/libstore/store.cc
@ -45,10 +45,12 @@ static TableId dbReferers = 0;
   class; i.e., the extension of an extension class. */
 static TableId dbEquivalences = 0;

-/* dbEquivalenceClass :: Path -> OutputEqClass
+/* dbEquivalenceClasses :: Path -> [OutputEqClass]

-   Lists for each output path the extension class that it is in. */
-static TableId dbEquivalenceClass = 0;
+   !!! should be [(TrustId, OutputEqClass)] ?
+
+   Lists for each output path the extension classes that it is in. */
+static TableId dbEquivalenceClasses = 0;


 #if 0
@ -108,7 +110,7 @@ void openDB()
    dbDerivers = nixDB.openTable("derivers");
 #endif
    dbEquivalences = nixDB.openTable("equivalences");
-    dbEquivalenceClass = nixDB.openTable("equivalence-class");
+    dbEquivalenceClasses = nixDB.openTable("equivalence-classes");

    int curSchema = 0;
    Path schemaFN = nixDBPath + "/schema";
@ -476,6 +478,14 @@ void addOutputEqMember(const Transaction & txn,
    }

    nixDB.setStrings(txn, dbEquivalences, eqClass, ss);
+
+    OutputEqClasses classes;
+    queryOutputEqClasses(txn, path, classes);
+
+    classes.insert(eqClass);
+
+    nixDB.setStrings(txn, dbEquivalenceClasses, path,
+        Strings(classes.begin(), classes.end()));
 }


@ -497,6 +507,15 @@ void queryOutputEqMembers(const Transaction & txn,
 }


+void queryOutputEqClasses(const Transaction & txn,
+    const Path & path, OutputEqClasses & classes)
+{
+    Strings ss;
+    nixDB.queryStrings(txn, dbEquivalenceClasses, path, ss);
+    classes.insert(ss.begin(), ss.end());
+}
+
+
 #if 0
 void setDeriver(const Transaction & txn, const Path & storePath,
    const Path & deriver)
@ -789,6 +808,7 @@ string rewriteHashes(string s, const HashRewrites & rewrites,
            debug(format("rewriting @ %1%") % j);
            positions.push_back(j);
            s.replace(j, to.size(), to);
+            j += to.size();
        }
    }

@ -828,6 +848,16 @@ static Hash hashModulo(string s, const PathHash & modulus)
 }


+static PathSet rewriteReferences(const PathSet & references,
+    const HashRewrites & rewrites)
+{
+    PathSet result;
+    for (PathSet::const_iterator i = references.begin(); i != references.end(); ++i)
+        result.insert(rewriteHashes(*i, rewrites));
+    return result;
+}
+
+
 static Path _addToStore(const string & suffix, string dump,
    const PathHash & selfHash, const PathSet & references)
 {
@ -875,9 +905,7 @@ static Path _addToStore(const string & suffix, string dump,
            /* Set the references for the new path.  Of course, any
               hash rewrites have to be applied to the references,
               too. */
-            PathSet references2;
-            for (PathSet::iterator i = references.begin(); i != references.end(); ++i)
-                references2.insert(rewriteHashes(*i, rewrites));
+            PathSet references2 = rewriteReferences(references, rewrites);
            
            Transaction txn(nixDB);
            registerValidPath(txn, dstPath, contentHash, references2, "");
@ -892,7 +920,7 @@ static Path _addToStore(const string & suffix, string dump,


 Path addToStore(const Path & _srcPath, const PathHash & selfHash,
-    const string & suffix, const PathSet & references)
+    const string & suffix, const PathSet & references, const HashRewrites & rewrites)
 {
    Path srcPath(absPath(_srcPath));
    debug(format("adding `%1%' to the store") % srcPath);
@ -903,8 +931,11 @@ Path addToStore(const Path & _srcPath, const PathHash & selfHash,
        dumpPath(srcPath, sink);
    }

+    if (rewrites.size() != 0) sink.s = rewriteHashes(sink.s, rewrites);
+
    return _addToStore(suffix == "" ? baseNameOf(srcPath) : suffix,
-        sink.s, selfHash, references);
+        sink.s, selfHash,
+        rewriteReferences(references, rewrites));
 }


--- a/src/libstore/store.hh
+++ b/src/libstore/store.hh
@ -74,6 +74,8 @@ typedef string TrustId;
   substituted with concrete paths when we actually build. */
 typedef Path OutputEqClass;

+typedef set<OutputEqClass> OutputEqClasses;
+

 /* A member of an output path equivalence class, i.e., a store path
   that has been produced by a certain derivation. */
@ -193,7 +195,10 @@ void addOutputEqMember(const Transaction & txn,

 void queryOutputEqMembers(const Transaction & txn,
    const OutputEqClass & eqClass, OutputEqMembers & members);
-    
+
+void queryOutputEqClasses(const Transaction & txn,
+    const Path & path, OutputEqClasses & classes);
+
 #if 0
 /* Sets the deriver of a store path.  Use with care! */
 void setDeriver(const Transaction & txn, const Path & storePath,
@ -226,7 +231,8 @@ string rewriteHashes(const string & s, const HashRewrites & rewrites);
 /* Copy the contents of a path to the store and register the validity
   the resulting path.  The resulting path is returned. */
 Path addToStore(const Path & srcPath, const PathHash & selfHash = PathHash(),
-    const string & suffix = "", const PathSet & references = PathSet());
+    const string & suffix = "", const PathSet & references = PathSet(),
+    const HashRewrites & rewrites = HashRewrites());

 #if 0
 /* Like addToStore(), but for pre-adding the outputs of fixed-output
--- a/src/libutil/util.cc
+++ b/src/libutil/util.cc
@ -357,6 +357,19 @@ void printMsg_(Verbosity level, const format & f)
 }


+string showPaths(const PathSet & paths)
+{
+    string s;
+    for (PathSet::const_iterator i = paths.begin();
+         i != paths.end(); ++i)
+    {
+        if (s.size() != 0) s += ", ";
+        s += "`" + *i + "'";
+    }
+    return s;
+}
+
+
 void readFull(int fd, unsigned char * buf, size_t count)
 {
    while (count) {
--- a/src/libutil/util.hh
+++ b/src/libutil/util.hh
@ -173,6 +173,8 @@ void printMsg_(Verbosity level, const format & f);

 #define debug(f) printMsg(lvlDebug, f)

+string showPaths(const PathSet & paths);
+

 /* Wrappers arount read()/write() that read/write exactly the
   requested number of bytes. */