From 3509299aca833ed50faab146f985853255041cb2 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 14 Jul 2003 10:23:11 +0000 Subject: [PATCH] * After building, scan for actual file system references as opposed to declared references. This prunes the reference graph, thus allowing better garbage collection and more efficient derivate distribution. --- src/Makefile.am | 2 +- src/archive.cc | 4 +- src/fstate.cc | 35 +++++++++++++++-- src/fstate.hh | 6 +-- src/references.cc | 98 +++++++++++++++++++++++++++++++++++++++++++++++ src/references.hh | 10 +++++ src/store.hh | 6 +-- 7 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 src/references.cc create mode 100644 src/references.hh diff --git a/src/Makefile.am b/src/Makefile.am index d8ec50f0b..3c590f4c0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,7 +20,7 @@ test_LDADD = libshared.a libnix.a -ldb_cxx-4 -lATerm noinst_LIBRARIES = libnix.a libshared.a libnix_a_SOURCES = util.cc hash.cc archive.cc md5.c \ - fstate.cc store.cc globals.cc db.cc + fstate.cc store.cc globals.cc db.cc references.cc libshared_a_SOURCES = shared.cc diff --git a/src/archive.cc b/src/archive.cc index 7e07b8a08..c9b78824e 100644 --- a/src/archive.cc +++ b/src/archive.cc @@ -87,7 +87,7 @@ static void dumpContents(const string & path, unsigned int size, writeInt(size, sink); int fd = open(path.c_str(), O_RDONLY); - if (fd == -1) throw SysError("opening file " + path); + if (fd == -1) throw SysError(format("opening file `%1%'") % path); unsigned char buf[65536]; @@ -112,7 +112,7 @@ static void dump(const string & path, DumpSink & sink) { struct stat st; if (lstat(path.c_str(), &st)) - throw SysError("getting attributes of path " + path); + throw SysError(format("getting attributes of path `%1%'") % path); writeString("(", sink); diff --git a/src/fstate.cc b/src/fstate.cc index a597b6df6..36f7482ac 100644 --- a/src/fstate.cc +++ b/src/fstate.cc @@ -11,6 +11,7 @@ #include "globals.hh" #include "store.hh" #include "db.hh" +#include "references.hh" /* A Unix environment is a mapping from strings to strings. */ @@ -279,12 +280,15 @@ static FState realise(FState fs, StringSet & paths) checkPlatform(platform); /* Realise inputs. */ + Strings inPaths; ATermList ins2 = ATempty; while (!ATisEmpty(ins)) { - ins2 = ATinsert(ins2, realise(ATgetFirst(ins), paths)); + FState in = realise(ATgetFirst(ins), paths); + inPaths.push_back(fstatePath(in)); + ins2 = ATinsert(ins2, in); ins = ATgetNext(ins); } - ins2 = ATreverse(ins2); + ins = ATreverse(ins2); /* Build the environment. */ Environment env; @@ -323,9 +327,34 @@ static FState realise(FState fs, StringSet & paths) values.cc. */ registerPath(outPath, outHash); + /* Filter out inputs that are not referenced in the output. */ + for (Strings::iterator i = inPaths.begin(); + i != inPaths.end(); i++) + debug(format("in: %1%") % *i); + + Strings outPaths = filterReferences(outPath, inPaths); + + for (Strings::iterator i = outPaths.begin(); + i != outPaths.end(); i++) + debug(format("out: %1%") % *i); + + ins2 = ATempty; + while (!ATisEmpty(ins)) { + FState in = ATgetFirst(ins); + string path = fstatePath(in); + for (Strings::iterator i = outPaths.begin(); + i != outPaths.end(); i++) + if (path.find(*i) != string::npos) { + debug(format("out2: %1%") % path); + ins2 = ATinsert(ins2, in); + } + ins = ATgetNext(ins); + } + ins = ATreverse(ins2); + /* Register the normal form of fs. */ FState nf = ATmake("Path(, Hash(), )", - outPath.c_str(), ((string) outHash).c_str(), ins2); + outPath.c_str(), ((string) outHash).c_str(), ins); nf = storeSuccessor(fs, nf, paths); return nf; diff --git a/src/fstate.hh b/src/fstate.hh index 9a8955aeb..9d789c834 100644 --- a/src/fstate.hh +++ b/src/fstate.hh @@ -1,5 +1,5 @@ -#ifndef __EVAL_H -#define __EVAL_H +#ifndef __FSTATE_H +#define __FSTATE_H #include @@ -97,4 +97,4 @@ Hash writeTerm(ATerm t, const string & suffix, string * p = 0); void registerSuccessor(const Hash & fsHash, const Hash & scHash); -#endif /* !__EVAL_H */ +#endif /* !__FSTATE_H */ diff --git a/src/references.cc b/src/references.cc new file mode 100644 index 000000000..de7a4b339 --- /dev/null +++ b/src/references.cc @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include + +#include "references.hh" +#include "hash.hh" + + +static void search(const string & s, + Strings & refs, Strings & seen) +{ + for (Strings::iterator i = refs.begin(); + i != refs.end(); ) + { + if (s.find(*i) == string::npos) + i++; + else { + debug(format("found reference to `%1%'") % *i); + seen.push_back(*i); + i = refs.erase(i); + } + } +} + + +void checkPath(const string & path, + Strings & refs, Strings & seen) +{ + struct stat st; + if (lstat(path.c_str(), &st)) + throw SysError(format("getting attributes of path `%1%'") % path); + + if (S_ISDIR(st.st_mode)) { + DIR * dir = opendir(path.c_str()); + + struct dirent * dirent; + while (errno = 0, dirent = readdir(dir)) { + string name = dirent->d_name; + if (name == "." || name == "..") continue; + search(name, refs, seen); + checkPath(path + "/" + name, refs, seen); + } + + closedir(dir); /* !!! close on exception */ + } + + else if (S_ISREG(st.st_mode)) { + + debug(format("checking `%1%'") % path); + + int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) throw SysError(format("opening file `%1%'") % path); + + char * buf = new char[st.st_size]; + + if (read(fd, buf, st.st_size) != st.st_size) + throw SysError(format("reading file %1%") % path); + + search(string(buf, st.st_size), refs, seen); + + delete buf; /* !!! autodelete */ + + close(fd); /* !!! close on exception */ + } + + else if (S_ISLNK(st.st_mode)) { + char buf[st.st_size]; + if (readlink(path.c_str(), buf, st.st_size) != st.st_size) + throw SysError(format("reading symbolic link `%1%'") % path); + search(string(buf, st.st_size), refs, seen); + } + + else throw Error(format("unknown file type: %1%") % path); +} + + +Strings filterReferences(const string & path, const Strings & _refs) +{ + Strings refs; + Strings seen; + + /* For efficiency (and a higher hit rate), just search for the + hash part of the file name. (This assumes that all references + have the form `HASH-bla'). */ + for (Strings::const_iterator i = _refs.begin(); + i != _refs.end(); i++) + { + string s = string(baseNameOf(*i), 0, 32); + parseHash(s); + refs.push_back(s); + } + + checkPath(path, refs, seen); + + return seen; +} diff --git a/src/references.hh b/src/references.hh new file mode 100644 index 000000000..b19fbf72c --- /dev/null +++ b/src/references.hh @@ -0,0 +1,10 @@ +#ifndef __VALUES_H +#define __VALUES_H + +#include "util.hh" + + +Strings filterReferences(const string & path, const Strings & refs); + + +#endif /* !__VALUES_H */ diff --git a/src/store.hh b/src/store.hh index 82fb2e12a..b6ed43ff6 100644 --- a/src/store.hh +++ b/src/store.hh @@ -1,5 +1,5 @@ -#ifndef __VALUES_H -#define __VALUES_H +#ifndef __STORE_H +#define __STORE_H #include @@ -37,4 +37,4 @@ void addToStore(string srcPath, string & dstPath, Hash & hash, void deleteFromStore(const string & path); -#endif /* !__VALUES_H */ +#endif /* !__STORE_H */