* Equivalence class consolidation. This solves the problem that when

we combine closures built by different users, the resulting set may
  contain multiple paths from the same output path equivalence class.

  For instance, if we do

    $ NIX_USER_ID=foo nix-env -i libXext
    $ NIX_USER_ID=root nix-env -i libXt
    $ NIX_USER_ID=foo nix-env -i libXmu

  (where libXmu depends on libXext and libXt, who both depend on
  libX11), then the following will happen:

    * User foo builds libX11 and libXext because they don't exist
      yet.
      
    * User root builds libX11 and libXt because the latter doesn't
      exist yet, while the former *does* exist but cannot be trusted.
      The instance of libX11 built by root will almost certainly
      differ from the one built by foo, so they are stored in separate
      locations.
      
    * User foo builds libXmu, which requires libXext and libXt.  Foo
      has trusted copies of both (libXext was built by himself, while
      libXt was built by root, who is trusted by foo).  So libXmu is
      built with foo's libXext and root's libXt as inputs.

    * The resulting libXmu will link against two copies of libX11,
      namely the one used by foo's libXext and the one used by root's
      libXt.  This is bad semantically (it's observable behaviour, and
      might well lead to build time or runtime failure (e.g.,
      duplicate definitions of symbols)) and in terms of efficiency
      (the closure of libXmu contains two copies of libX11, so both
      must be deployed).

  The problem is to apply hash rewriting to "consolidate" the set of
  input paths to a build.  The invariant we wish to maintain is that
  any closure may contain at most one path from each equivalence
  class.
  
  So in the case of a collision, we select one path from each class,
  and *rewrite* all paths in that set to point only to paths in that
  set.  For instance, in the example above, we can rewrite foo's
  libXext to link against root's libX11.  That is, the hash part of
  foo's libX11 is replaced by the hash part of root's libX11.

  The hard part is to figure out which path to select from each
  class.  Some selections may be cheaper than others (i.e., require
  fewer rewrites).  The current implementation is rather dumb: it
  tries all possible selections, and picks the cheapest.  This is an
  exponential time algorithm.

  There certainly are more efficient common-case (heuristical)
  approaches.  But I don't know yet if there is a worst-case
  polynomial time algorithm.
This commit is contained in:
Eelco Dolstra 2005-05-30 10:49:00 +00:00
parent 4f83146459
commit b119dd279e
7 changed files with 231 additions and 24 deletions

View file

@ -720,19 +720,6 @@ PathSet outputPaths(const DerivationOutputs & outputs)
#endif
string showPaths(const PathSet & paths)
{
string s;
for (PathSet::const_iterator i = paths.begin();
i != paths.end(); ++i)
{
if (s.size() != 0) s += ", ";
s += "`" + *i + "'";
}
return s;
}
DerivationGoal::HookReply DerivationGoal::tryBuildHook()
{
return rpDecline;
@ -997,6 +984,15 @@ bool DerivationGoal::prepareBuild()
i != drv.inputSrcs.end(); ++i)
computeFSClosure(*i, inputPaths);
/* There might be equivalence class collisions now. That is,
different input closures might contain different paths from the
*same* output path equivalence class. We should pick one from
each, and rewrite dependent paths. */
inputPaths = consolidatePaths(inputPaths, false);
/* !!! remove, debug only */
consolidatePaths(inputPaths, true);
printMsg(lvlError, format("added input paths %1%") % showPaths(inputPaths)); /* !!! */
allPaths.insert(inputPaths.begin(), inputPaths.end());

View file

@ -1,4 +1,5 @@
#include "build.hh"
#include "misc.hh"
Derivation derivationFromPath(const Path & drvPath)
@ -49,3 +50,158 @@ Path findTrustedEqClassMember(const OutputEqClass & eqClass,
return "";
}
typedef map<OutputEqClass, PathSet> ClassMap;
typedef map<OutputEqClass, Path> FinalClassMap;
static void findBestRewrite(const ClassMap::const_iterator & pos,
const ClassMap::const_iterator & end,
const PathSet & selection, const PathSet & unselection,
unsigned int & bestCost, PathSet & bestSelection)
{
if (pos != end) {
for (PathSet::iterator i = pos->second.begin();
i != pos->second.end(); ++i)
{
PathSet selection2(selection);
selection2.insert(*i);
PathSet unselection2(unselection);
for (PathSet::iterator j = pos->second.begin();
j != pos->second.end(); ++j)
if (i != j) unselection2.insert(*j);
ClassMap::const_iterator j = pos; ++j;
findBestRewrite(j, end, selection2, unselection2,
bestCost, bestSelection);
}
return;
}
// printMsg(lvlError, format("selection %1%") % showPaths(selection));
PathSet badPaths;
for (PathSet::iterator i = selection.begin();
i != selection.end(); ++i)
{
PathSet closure;
computeFSClosure(*i, closure);
for (PathSet::iterator j = closure.begin();
j != closure.end(); ++j)
if (unselection.find(*j) != unselection.end())
badPaths.insert(*i);
}
printMsg(lvlError, format("cost %1% %2%") % badPaths.size() % showPaths(badPaths));
if (badPaths.size() < bestCost) {
bestCost = badPaths.size();
bestSelection = selection;
}
}
static Path maybeRewrite(const Path & path, const PathSet & selection,
const FinalClassMap & finalClassMap)
{
assert(selection.find(path) != selection.end());
PathSet references;
queryReferences(noTxn, path, references);
HashRewrites rewrites;
bool okay = true;
for (PathSet::iterator i = references.begin(); i != references.end(); ++i) {
if (*i == path) continue; /* ignore self-references */
if (selection.find(*i) == selection.end()) {
OutputEqClasses classes;
queryOutputEqClasses(noTxn, *i, classes);
if (classes.size() > 0) /* !!! hacky; ignore sources; they
are not in any eq class */
{
printMsg(lvlError, format("in `%1%': missing `%2%'") % path % *i);
okay = false;
FinalClassMap::const_iterator j = finalClassMap.find(*(classes.begin()));
assert(j != finalClassMap.end());
printMsg(lvlError, format("replacing with `%1%'") % j->second);
Path newPath = maybeRewrite(j->second, selection, finalClassMap);
if (*i != newPath)
rewrites[hashPartOf(*i)] = hashPartOf(newPath);
}
}
}
if (rewrites.size() == 0) return path;
printMsg(lvlError, format("rewriting `%1%'") % path);
Path newPath = addToStore(path,
hashPartOf(path), namePartOf(path),
references, rewrites);
printMsg(lvlError, format("rewrote `%1%' to `%2%'") % path % newPath);
return newPath;
}
PathSet consolidatePaths(const PathSet & paths, bool checkOnly)
{
printMsg(lvlError, format("consolidating"));
ClassMap classMap;
for (PathSet::const_iterator i = paths.begin(); i != paths.end(); ++i) {
OutputEqClasses classes;
queryOutputEqClasses(noTxn, *i, classes);
/* !!! deal with sources */
for (OutputEqClasses::iterator j = classes.begin(); j != classes.end(); ++j) {
classMap[*j].insert(*i);
}
}
bool conflict = false;
for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
if (i->second.size() >= 2) {
printMsg(lvlError, format("conflict in eq class `%1%'") % i->first);
conflict = true;
}
if (!conflict) return paths;
assert(!checkOnly);
/* !!! exponential-time algorithm! */
const unsigned int infinity = 1000000;
unsigned int bestCost = infinity;
PathSet bestSelection;
findBestRewrite(classMap.begin(), classMap.end(),
PathSet(), PathSet(), bestCost, bestSelection);
assert(bestCost != infinity);
printMsg(lvlError, format("cheapest selection %1% %2%")
% bestCost % showPaths(bestSelection));
FinalClassMap finalClassMap;
for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
for (PathSet::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
if (bestSelection.find(*j) != bestSelection.end())
finalClassMap[i->first] = *j;
PathSet newPaths;
for (PathSet::iterator i = bestSelection.begin();
i != bestSelection.end(); ++i)
newPaths.insert(maybeRewrite(*i, bestSelection, finalClassMap));
return newPaths;
}

View file

@ -33,4 +33,7 @@ Path findTrustedEqClassMember(const OutputEqClass & eqClass,
const TrustId & trustId);
PathSet consolidatePaths(const PathSet & paths, bool checkOnly);
#endif /* !__MISC_H */

View file

@ -45,10 +45,12 @@ static TableId dbReferers = 0;
class; i.e., the extension of an extension class. */
static TableId dbEquivalences = 0;
/* dbEquivalenceClass :: Path -> OutputEqClass
/* dbEquivalenceClasses :: Path -> [OutputEqClass]
Lists for each output path the extension class that it is in. */
static TableId dbEquivalenceClass = 0;
!!! should be [(TrustId, OutputEqClass)] ?
Lists for each output path the extension classes that it is in. */
static TableId dbEquivalenceClasses = 0;
#if 0
@ -108,7 +110,7 @@ void openDB()
dbDerivers = nixDB.openTable("derivers");
#endif
dbEquivalences = nixDB.openTable("equivalences");
dbEquivalenceClass = nixDB.openTable("equivalence-class");
dbEquivalenceClasses = nixDB.openTable("equivalence-classes");
int curSchema = 0;
Path schemaFN = nixDBPath + "/schema";
@ -476,6 +478,14 @@ void addOutputEqMember(const Transaction & txn,
}
nixDB.setStrings(txn, dbEquivalences, eqClass, ss);
OutputEqClasses classes;
queryOutputEqClasses(txn, path, classes);
classes.insert(eqClass);
nixDB.setStrings(txn, dbEquivalenceClasses, path,
Strings(classes.begin(), classes.end()));
}
@ -497,6 +507,15 @@ void queryOutputEqMembers(const Transaction & txn,
}
void queryOutputEqClasses(const Transaction & txn,
const Path & path, OutputEqClasses & classes)
{
Strings ss;
nixDB.queryStrings(txn, dbEquivalenceClasses, path, ss);
classes.insert(ss.begin(), ss.end());
}
#if 0
void setDeriver(const Transaction & txn, const Path & storePath,
const Path & deriver)
@ -789,6 +808,7 @@ string rewriteHashes(string s, const HashRewrites & rewrites,
debug(format("rewriting @ %1%") % j);
positions.push_back(j);
s.replace(j, to.size(), to);
j += to.size();
}
}
@ -828,6 +848,16 @@ static Hash hashModulo(string s, const PathHash & modulus)
}
static PathSet rewriteReferences(const PathSet & references,
const HashRewrites & rewrites)
{
PathSet result;
for (PathSet::const_iterator i = references.begin(); i != references.end(); ++i)
result.insert(rewriteHashes(*i, rewrites));
return result;
}
static Path _addToStore(const string & suffix, string dump,
const PathHash & selfHash, const PathSet & references)
{
@ -875,9 +905,7 @@ static Path _addToStore(const string & suffix, string dump,
/* Set the references for the new path. Of course, any
hash rewrites have to be applied to the references,
too. */
PathSet references2;
for (PathSet::iterator i = references.begin(); i != references.end(); ++i)
references2.insert(rewriteHashes(*i, rewrites));
PathSet references2 = rewriteReferences(references, rewrites);
Transaction txn(nixDB);
registerValidPath(txn, dstPath, contentHash, references2, "");
@ -892,7 +920,7 @@ static Path _addToStore(const string & suffix, string dump,
Path addToStore(const Path & _srcPath, const PathHash & selfHash,
const string & suffix, const PathSet & references)
const string & suffix, const PathSet & references, const HashRewrites & rewrites)
{
Path srcPath(absPath(_srcPath));
debug(format("adding `%1%' to the store") % srcPath);
@ -903,8 +931,11 @@ Path addToStore(const Path & _srcPath, const PathHash & selfHash,
dumpPath(srcPath, sink);
}
if (rewrites.size() != 0) sink.s = rewriteHashes(sink.s, rewrites);
return _addToStore(suffix == "" ? baseNameOf(srcPath) : suffix,
sink.s, selfHash, references);
sink.s, selfHash,
rewriteReferences(references, rewrites));
}

View file

@ -74,6 +74,8 @@ typedef string TrustId;
substituted with concrete paths when we actually build. */
typedef Path OutputEqClass;
typedef set<OutputEqClass> OutputEqClasses;
/* A member of an output path equivalence class, i.e., a store path
that has been produced by a certain derivation. */
@ -193,7 +195,10 @@ void addOutputEqMember(const Transaction & txn,
void queryOutputEqMembers(const Transaction & txn,
const OutputEqClass & eqClass, OutputEqMembers & members);
void queryOutputEqClasses(const Transaction & txn,
const Path & path, OutputEqClasses & classes);
#if 0
/* Sets the deriver of a store path. Use with care! */
void setDeriver(const Transaction & txn, const Path & storePath,
@ -226,7 +231,8 @@ string rewriteHashes(const string & s, const HashRewrites & rewrites);
/* Copy the contents of a path to the store and register the validity
the resulting path. The resulting path is returned. */
Path addToStore(const Path & srcPath, const PathHash & selfHash = PathHash(),
const string & suffix = "", const PathSet & references = PathSet());
const string & suffix = "", const PathSet & references = PathSet(),
const HashRewrites & rewrites = HashRewrites());
#if 0
/* Like addToStore(), but for pre-adding the outputs of fixed-output

View file

@ -357,6 +357,19 @@ void printMsg_(Verbosity level, const format & f)
}
string showPaths(const PathSet & paths)
{
string s;
for (PathSet::const_iterator i = paths.begin();
i != paths.end(); ++i)
{
if (s.size() != 0) s += ", ";
s += "`" + *i + "'";
}
return s;
}
void readFull(int fd, unsigned char * buf, size_t count)
{
while (count) {

View file

@ -173,6 +173,8 @@ void printMsg_(Verbosity level, const format & f);
#define debug(f) printMsg(lvlDebug, f)
string showPaths(const PathSet & paths);
/* Wrappers arount read()/write() that read/write exactly the
requested number of bytes. */