From 73c53935d00660301e9408beabf1c80d6ef48610 Mon Sep 17 00:00:00 2001
From: Eelco Dolstra <e.dolstra@tudelft.nl>
Date: Mon, 24 Mar 2003 17:49:56 +0000
Subject: [PATCH] * For efficiency: md5 integrated into nix. * Command `nix
 ensure' which is like `nix getpkg' except that if the   has refers to a run
 action it will just ensure that the imports are   there. * Command `nix
 closure' to print out the closure of the set of   descriptors under the
 import relation, starting at a set of roots.   This can be used for garbage
 collection (e.g., given a list of   `activated' packages, we can delete all
 packages not reachable from   those). * Command `nix graph' to print out a
 Dot graph of the dependency   graph. * `nix-addroot' adds a root for the
 (unimplemented) garbage collector.

---
 src/Makefile    |  12 +-
 src/md5.c       | 435 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/md5.h       | 151 +++++++++++++++++
 src/nix-addroot |  18 ++
 src/nix.cc      | 260 ++++++++++++++++++++++-------
 5 files changed, 810 insertions(+), 66 deletions(-)
 create mode 100644 src/md5.c
 create mode 100644 src/md5.h
 create mode 100755 src/nix-addroot

diff --git a/src/Makefile b/src/Makefile
index 6b4c792bb..237257275 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,8 +2,16 @@ all: nix nix-instantiate
 
 SYSTEM = $(shell ./config.guess)
 
-nix: nix.cc
-	g++ -g -Wall -o nix nix.cc -ldb_cxx-4 -DSYSTEM=\"$(SYSTEM)\"
+nix: nix.o md5.o
+	g++ -g -o $@ $^ -ldb_cxx-4
+
+%.o: %.cc
+	g++ -g -Wall -o $@ -c $< -DSYSTEM=\"$(SYSTEM)\"
+
+%.o: %.c
+	gcc -g -Wall -o $@ -c $< -DSYSTEM=\"$(SYSTEM)\"
+
+md5.o: md5.c md5.h
 
 nix-instantiate: nix-instantiate.in
 	sed "s/@SYSTEM@/$(SYSTEM)/" < $^ > $@
diff --git a/src/md5.c b/src/md5.c
new file mode 100644
index 000000000..64ade3c6f
--- /dev/null
+++ b/src/md5.c
@@ -0,0 +1,435 @@
+/* Functions to compute MD5 message digest of files or memory blocks.
+   according to the definition of MD5 in RFC 1321 from April 1992.
+   Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "md5.h"
+
+#ifdef WORDS_BIGENDIAN
+# define SWAP(n)							\
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+#else
+# define SWAP(n) (n)
+#endif
+
+
+/* This array contains the bytes used to pad the buffer to the next
+   64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
+static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
+
+
+/* Initialize structure containing state of computation.
+   (RFC 1321, 3.3: Step 3)  */
+void
+md5_init_ctx (ctx)
+     struct md5_ctx *ctx;
+{
+  ctx->A = 0x67452301;
+  ctx->B = 0xefcdab89;
+  ctx->C = 0x98badcfe;
+  ctx->D = 0x10325476;
+
+  ctx->total[0] = ctx->total[1] = 0;
+  ctx->buflen = 0;
+}
+
+/* Put result from CTX in first 16 bytes following RESBUF.  The result
+   must be in little endian byte order.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+void *
+md5_read_ctx (ctx, resbuf)
+     const struct md5_ctx *ctx;
+     void *resbuf;
+{
+  ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
+  ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
+  ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
+  ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
+
+  return resbuf;
+}
+
+/* Process the remaining bytes in the internal buffer and the usual
+   prolog according to the standard and write the result to RESBUF.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+void *
+md5_finish_ctx (ctx, resbuf)
+     struct md5_ctx *ctx;
+     void *resbuf;
+{
+  /* Take yet unprocessed bytes into account.  */
+  md5_uint32 bytes = ctx->buflen;
+  size_t pad;
+
+  /* Now count remaining bytes.  */
+  ctx->total[0] += bytes;
+  if (ctx->total[0] < bytes)
+    ++ctx->total[1];
+
+  pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
+  memcpy (&ctx->buffer[bytes], fillbuf, pad);
+
+  /* Put the 64-bit file length in *bits* at the end of the buffer.  */
+  *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP (ctx->total[0] << 3);
+  *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP ((ctx->total[1] << 3) |
+							(ctx->total[0] >> 29));
+
+  /* Process last bytes.  */
+  md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
+
+  return md5_read_ctx (ctx, resbuf);
+}
+
+/* Compute MD5 message digest for bytes read from STREAM.  The
+   resulting message digest number will be written into the 16 bytes
+   beginning at RESBLOCK.  */
+int
+md5_stream (stream, resblock)
+     FILE *stream;
+     void *resblock;
+{
+  /* Important: BLOCKSIZE must be a multiple of 64.  */
+#define BLOCKSIZE 4096
+  struct md5_ctx ctx;
+  char buffer[BLOCKSIZE + 72];
+  size_t sum;
+
+  /* Initialize the computation context.  */
+  md5_init_ctx (&ctx);
+
+  /* Iterate over full file contents.  */
+  while (1)
+    {
+      /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
+	 computation function processes the whole buffer so that with the
+	 next round of the loop another block can be read.  */
+      size_t n;
+      sum = 0;
+
+      /* Read block.  Take care for partial reads.  */
+      do
+	{
+	  n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
+
+	  sum += n;
+	}
+      while (sum < BLOCKSIZE && n != 0);
+      if (n == 0 && ferror (stream))
+        return 1;
+
+      /* If end of file is reached, end the loop.  */
+      if (n == 0)
+	break;
+
+      /* Process buffer with BLOCKSIZE bytes.  Note that
+			BLOCKSIZE % 64 == 0
+       */
+      md5_process_block (buffer, BLOCKSIZE, &ctx);
+    }
+
+  /* Add the last bytes if necessary.  */
+  if (sum > 0)
+    md5_process_bytes (buffer, sum, &ctx);
+
+  /* Construct result in desired memory.  */
+  md5_finish_ctx (&ctx, resblock);
+  return 0;
+}
+
+/* Compute MD5 message digest for LEN bytes beginning at BUFFER.  The
+   result is always in little endian byte order, so that a byte-wise
+   output yields to the wanted ASCII representation of the message
+   digest.  */
+void *
+md5_buffer (buffer, len, resblock)
+     const char *buffer;
+     size_t len;
+     void *resblock;
+{
+  struct md5_ctx ctx;
+
+  /* Initialize the computation context.  */
+  md5_init_ctx (&ctx);
+
+  /* Process whole buffer but last len % 64 bytes.  */
+  md5_process_bytes (buffer, len, &ctx);
+
+  /* Put result in desired memory area.  */
+  return md5_finish_ctx (&ctx, resblock);
+}
+
+
+void
+md5_process_bytes (buffer, len, ctx)
+     const void *buffer;
+     size_t len;
+     struct md5_ctx *ctx;
+{
+  /* When we already have some bits in our internal buffer concatenate
+     both inputs first.  */
+  if (ctx->buflen != 0)
+    {
+      size_t left_over = ctx->buflen;
+      size_t add = 128 - left_over > len ? len : 128 - left_over;
+
+      memcpy (&ctx->buffer[left_over], buffer, add);
+      ctx->buflen += add;
+
+      if (ctx->buflen > 64)
+	{
+	  md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
+
+	  ctx->buflen &= 63;
+	  /* The regions in the following copy operation cannot overlap.  */
+	  memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
+		  ctx->buflen);
+	}
+
+      buffer = (const char *) buffer + add;
+      len -= add;
+    }
+
+  /* Process available complete blocks.  */
+  if (len >= 64)
+    {
+#if !_STRING_ARCH_unaligned
+/* To check alignment gcc has an appropriate operator.  Other
+   compilers don't.  */
+# if __GNUC__ >= 2
+#  define UNALIGNED_P(p) (((md5_uintptr) p) % __alignof__ (md5_uint32) != 0)
+# else
+#  define UNALIGNED_P(p) (((md5_uintptr) p) % sizeof (md5_uint32) != 0)
+# endif
+      if (UNALIGNED_P (buffer))
+	while (len > 64)
+	  {
+	    md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
+	    buffer = (const char *) buffer + 64;
+	    len -= 64;
+	  }
+      else
+#endif
+	{
+	  md5_process_block (buffer, len & ~63, ctx);
+	  buffer = (const char *) buffer + (len & ~63);
+	  len &= 63;
+	}
+    }
+
+  /* Move remaining bytes in internal buffer.  */
+  if (len > 0)
+    {
+      size_t left_over = ctx->buflen;
+
+      memcpy (&ctx->buffer[left_over], buffer, len);
+      left_over += len;
+      if (left_over >= 64)
+	{
+	  md5_process_block (ctx->buffer, 64, ctx);
+	  left_over -= 64;
+	  memcpy (ctx->buffer, &ctx->buffer[64], left_over);
+	}
+      ctx->buflen = left_over;
+    }
+}
+
+
+/* These are the four functions used in the four steps of the MD5 algorithm
+   and defined in the RFC 1321.  The first function is a little bit optimized
+   (as found in Colin Plumbs public domain implementation).  */
+/* #define FF(b, c, d) ((b & c) | (~b & d)) */
+#define FF(b, c, d) (d ^ (b & (c ^ d)))
+#define FG(b, c, d) FF (d, b, c)
+#define FH(b, c, d) (b ^ c ^ d)
+#define FI(b, c, d) (c ^ (b | ~d))
+
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+   It is assumed that LEN % 64 == 0.  */
+
+void
+md5_process_block (buffer, len, ctx)
+     const void *buffer;
+     size_t len;
+     struct md5_ctx *ctx;
+{
+  md5_uint32 correct_words[16];
+  const md5_uint32 *words = buffer;
+  size_t nwords = len / sizeof (md5_uint32);
+  const md5_uint32 *endp = words + nwords;
+  md5_uint32 A = ctx->A;
+  md5_uint32 B = ctx->B;
+  md5_uint32 C = ctx->C;
+  md5_uint32 D = ctx->D;
+
+  /* First increment the byte count.  RFC 1321 specifies the possible
+     length of the file up to 2^64 bits.  Here we only compute the
+     number of bytes.  Do a double word increment.  */
+  ctx->total[0] += len;
+  if (ctx->total[0] < len)
+    ++ctx->total[1];
+
+  /* Process all bytes in the buffer with 64 bytes in each round of
+     the loop.  */
+  while (words < endp)
+    {
+      md5_uint32 *cwp = correct_words;
+      md5_uint32 A_save = A;
+      md5_uint32 B_save = B;
+      md5_uint32 C_save = C;
+      md5_uint32 D_save = D;
+
+      /* First round: using the given function, the context and a constant
+	 the next context is computed.  Because the algorithms processing
+	 unit is a 32-bit word and it is determined to work on words in
+	 little endian byte order we perhaps have to change the byte order
+	 before the computation.  To reduce the work for the next steps
+	 we store the swapped words in the array CORRECT_WORDS.  */
+
+#define OP(a, b, c, d, s, T)						\
+      do								\
+        {								\
+	  a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T;		\
+	  ++words;							\
+	  CYCLIC (a, s);						\
+	  a += b;							\
+        }								\
+      while (0)
+
+      /* It is unfortunate that C does not provide an operator for
+	 cyclic rotation.  Hope the C compiler is smart enough.  */
+#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
+
+      /* Before we start, one word to the strange constants.
+	 They are defined in RFC 1321 as
+
+	 T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
+       */
+
+      /* Round 1.  */
+      OP (A, B, C, D,  7, 0xd76aa478);
+      OP (D, A, B, C, 12, 0xe8c7b756);
+      OP (C, D, A, B, 17, 0x242070db);
+      OP (B, C, D, A, 22, 0xc1bdceee);
+      OP (A, B, C, D,  7, 0xf57c0faf);
+      OP (D, A, B, C, 12, 0x4787c62a);
+      OP (C, D, A, B, 17, 0xa8304613);
+      OP (B, C, D, A, 22, 0xfd469501);
+      OP (A, B, C, D,  7, 0x698098d8);
+      OP (D, A, B, C, 12, 0x8b44f7af);
+      OP (C, D, A, B, 17, 0xffff5bb1);
+      OP (B, C, D, A, 22, 0x895cd7be);
+      OP (A, B, C, D,  7, 0x6b901122);
+      OP (D, A, B, C, 12, 0xfd987193);
+      OP (C, D, A, B, 17, 0xa679438e);
+      OP (B, C, D, A, 22, 0x49b40821);
+
+      /* For the second to fourth round we have the possibly swapped words
+	 in CORRECT_WORDS.  Redefine the macro to take an additional first
+	 argument specifying the function to use.  */
+#undef OP
+#define OP(f, a, b, c, d, k, s, T)					\
+      do 								\
+	{								\
+	  a += f (b, c, d) + correct_words[k] + T;			\
+	  CYCLIC (a, s);						\
+	  a += b;							\
+	}								\
+      while (0)
+
+      /* Round 2.  */
+      OP (FG, A, B, C, D,  1,  5, 0xf61e2562);
+      OP (FG, D, A, B, C,  6,  9, 0xc040b340);
+      OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
+      OP (FG, B, C, D, A,  0, 20, 0xe9b6c7aa);
+      OP (FG, A, B, C, D,  5,  5, 0xd62f105d);
+      OP (FG, D, A, B, C, 10,  9, 0x02441453);
+      OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
+      OP (FG, B, C, D, A,  4, 20, 0xe7d3fbc8);
+      OP (FG, A, B, C, D,  9,  5, 0x21e1cde6);
+      OP (FG, D, A, B, C, 14,  9, 0xc33707d6);
+      OP (FG, C, D, A, B,  3, 14, 0xf4d50d87);
+      OP (FG, B, C, D, A,  8, 20, 0x455a14ed);
+      OP (FG, A, B, C, D, 13,  5, 0xa9e3e905);
+      OP (FG, D, A, B, C,  2,  9, 0xfcefa3f8);
+      OP (FG, C, D, A, B,  7, 14, 0x676f02d9);
+      OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
+
+      /* Round 3.  */
+      OP (FH, A, B, C, D,  5,  4, 0xfffa3942);
+      OP (FH, D, A, B, C,  8, 11, 0x8771f681);
+      OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
+      OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
+      OP (FH, A, B, C, D,  1,  4, 0xa4beea44);
+      OP (FH, D, A, B, C,  4, 11, 0x4bdecfa9);
+      OP (FH, C, D, A, B,  7, 16, 0xf6bb4b60);
+      OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
+      OP (FH, A, B, C, D, 13,  4, 0x289b7ec6);
+      OP (FH, D, A, B, C,  0, 11, 0xeaa127fa);
+      OP (FH, C, D, A, B,  3, 16, 0xd4ef3085);
+      OP (FH, B, C, D, A,  6, 23, 0x04881d05);
+      OP (FH, A, B, C, D,  9,  4, 0xd9d4d039);
+      OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
+      OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
+      OP (FH, B, C, D, A,  2, 23, 0xc4ac5665);
+
+      /* Round 4.  */
+      OP (FI, A, B, C, D,  0,  6, 0xf4292244);
+      OP (FI, D, A, B, C,  7, 10, 0x432aff97);
+      OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
+      OP (FI, B, C, D, A,  5, 21, 0xfc93a039);
+      OP (FI, A, B, C, D, 12,  6, 0x655b59c3);
+      OP (FI, D, A, B, C,  3, 10, 0x8f0ccc92);
+      OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
+      OP (FI, B, C, D, A,  1, 21, 0x85845dd1);
+      OP (FI, A, B, C, D,  8,  6, 0x6fa87e4f);
+      OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
+      OP (FI, C, D, A, B,  6, 15, 0xa3014314);
+      OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
+      OP (FI, A, B, C, D,  4,  6, 0xf7537e82);
+      OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
+      OP (FI, C, D, A, B,  2, 15, 0x2ad7d2bb);
+      OP (FI, B, C, D, A,  9, 21, 0xeb86d391);
+
+      /* Add the starting values of the context.  */
+      A += A_save;
+      B += B_save;
+      C += C_save;
+      D += D_save;
+    }
+
+  /* Put checksum in context given as argument.  */
+  ctx->A = A;
+  ctx->B = B;
+  ctx->C = C;
+  ctx->D = D;
+}
diff --git a/src/md5.h b/src/md5.h
new file mode 100644
index 000000000..6301e4558
--- /dev/null
+++ b/src/md5.h
@@ -0,0 +1,151 @@
+/* Declaration of functions and data types used for MD5 sum computing
+   library functions.
+   Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _MD5_H
+#define _MD5_H 1
+
+#include <stdio.h>
+
+#if defined HAVE_LIMITS_H || _LIBC
+# include <limits.h>
+#endif
+
+/* The following contortions are an attempt to use the C preprocessor
+   to determine an unsigned integral type that is 32 bits wide.  An
+   alternative approach is to use autoconf's AC_CHECK_SIZEOF macro, but
+   doing that would require that the configure script compile and *run*
+   the resulting executable.  Locally running cross-compiled executables
+   is usually not possible.  */
+
+#ifdef _LIBC
+# include <stdint.h>
+typedef uint32_t md5_uint32;
+typedef uintptr_t md5_uintptr;
+#else
+# if defined __STDC__ && __STDC__
+#  define UINT_MAX_32_BITS 4294967295U
+# else
+#  define UINT_MAX_32_BITS 0xFFFFFFFF
+# endif
+
+/* If UINT_MAX isn't defined, assume it's a 32-bit type.
+   This should be valid for all systems GNU cares about because
+   that doesn't include 16-bit systems, and only modern systems
+   (that certainly have <limits.h>) have 64+-bit integral types.  */
+
+# ifndef UINT_MAX
+#  define UINT_MAX UINT_MAX_32_BITS
+# endif
+
+# if UINT_MAX == UINT_MAX_32_BITS
+   typedef unsigned int md5_uint32;
+# else
+#  if USHRT_MAX == UINT_MAX_32_BITS
+    typedef unsigned short md5_uint32;
+#  else
+#   if ULONG_MAX == UINT_MAX_32_BITS
+     typedef unsigned long md5_uint32;
+#   else
+     /* The following line is intended to evoke an error.
+        Using #error is not portable enough.  */
+     "Cannot determine unsigned 32-bit data type."
+#   endif
+#  endif
+# endif
+/* We have to make a guess about the integer type equivalent in size
+   to pointers which should always be correct.  */
+typedef unsigned long int md5_uintptr;
+#endif
+
+#undef __P
+#if defined (__STDC__) && __STDC__
+# define __P(x) x
+#else
+# define __P(x) ()
+#endif
+
+/* Structure to save state of computation between the single steps.  */
+struct md5_ctx
+{
+  md5_uint32 A;
+  md5_uint32 B;
+  md5_uint32 C;
+  md5_uint32 D;
+
+  md5_uint32 total[2];
+  md5_uint32 buflen;
+  char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
+};
+
+/*
+ * The following three functions are build up the low level used in
+ * the functions `md5_stream' and `md5_buffer'.
+ */
+
+/* Initialize structure containing state of computation.
+   (RFC 1321, 3.3: Step 3)  */
+extern void md5_init_ctx __P ((struct md5_ctx *ctx));
+
+/* Starting with the result of former calls of this function (or the
+   initialization function update the context for the next LEN bytes
+   starting at BUFFER.
+   It is necessary that LEN is a multiple of 64!!! */
+extern void md5_process_block __P ((const void *buffer, size_t len,
+				      struct md5_ctx *ctx));
+
+/* Starting with the result of former calls of this function (or the
+   initialization function update the context for the next LEN bytes
+   starting at BUFFER.
+   It is NOT required that LEN is a multiple of 64.  */
+extern void md5_process_bytes __P ((const void *buffer, size_t len,
+				      struct md5_ctx *ctx));
+
+/* Process the remaining bytes in the buffer and put result from CTX
+   in first 16 bytes following RESBUF.  The result is always in little
+   endian byte order, so that a byte-wise output yields to the wanted
+   ASCII representation of the message digest.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+extern void *md5_finish_ctx __P ((struct md5_ctx *ctx, void *resbuf));
+
+
+/* Put result from CTX in first 16 bytes following RESBUF.  The result is
+   always in little endian byte order, so that a byte-wise output yields
+   to the wanted ASCII representation of the message digest.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+extern void *md5_read_ctx __P ((const struct md5_ctx *ctx, void *resbuf));
+
+
+/* Compute MD5 message digest for bytes read from STREAM.  The
+   resulting message digest number will be written into the 16 bytes
+   beginning at RESBLOCK.  */
+extern int md5_stream __P ((FILE *stream, void *resblock));
+
+/* Compute MD5 message digest for LEN bytes beginning at BUFFER.  The
+   result is always in little endian byte order, so that a byte-wise
+   output yields to the wanted ASCII representation of the message
+   digest.  */
+extern void *md5_buffer __P ((const char *buffer, size_t len,
+				void *resblock));
+    
+#endif /* md5.h */
diff --git a/src/nix-addroot b/src/nix-addroot
new file mode 100755
index 000000000..3ab9e8a25
--- /dev/null
+++ b/src/nix-addroot
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+ROOTLIST=~/.nixroots
+
+if ! test -f $ROOTLIST; then
+    touch $ROOTLIST
+fi
+
+for i in $*; do
+    if nix ensure $i > /dev/null; then
+	if grep -q $i $ROOTLIST; then
+	    echo $i already is a root
+        else
+            echo adding root $i
+	    echo $i >> $ROOTLIST
+	fi
+    fi
+done
diff --git a/src/nix.cc b/src/nix.cc
index d53a809b7..4ff49eabd 100644
--- a/src/nix.cc
+++ b/src/nix.cc
@@ -4,6 +4,8 @@
 #include <string>
 #include <sstream>
 #include <list>
+#include <vector>
+#include <set>
 #include <map>
 #include <cstdio>
 
@@ -14,6 +16,10 @@
 
 #include <db4/db_cxx.h>
 
+extern "C" {
+#include "md5.h"
+}
+
 using namespace std;
 
 
@@ -146,8 +152,20 @@ void enumDB(const string & dbname, DBPairs & contents)
 }
 
 
+string printHash(unsigned char * buf)
+{
+    ostringstream str;
+    for (int i = 0; i < 16; i++) {
+        str.fill('0');
+        str.width(2);
+        str << hex << (int) buf[i];
+    }
+    return str.str();
+}
+
+    
 /* Verify that a reference is valid (that is, is a MD5 hash code). */
-void checkRef(const string & s)
+void checkHash(const string & s)
 {
     string err = "invalid reference: " + s;
     if (s.length() != 32)
@@ -162,31 +180,36 @@ void checkRef(const string & s)
 
 
 /* Compute the MD5 hash of a file. */
-string makeRef(string filename)
+string hashFile(string filename)
 {
-    char hash[33];
-
-    FILE * pipe = popen(("md5sum " + filename + " 2> /dev/null").c_str(), "r");
-    if (!pipe) throw BadRefError("cannot execute md5sum");
-
-    if (fread(hash, 32, 1, pipe) != 1)
-        throw BadRefError("cannot read hash from md5sum of " + filename);
-    hash[32] = 0;
-
-    pclose(pipe);
-
-    checkRef(hash);
-    return hash;
+    unsigned char hash[16];
+    FILE * file = fopen(filename.c_str(), "rb");
+    if (!file)
+        throw BadRefError("file `" + filename + "' does not exist");
+    int err = md5_stream(file, hash);
+    fclose(file);
+    if (err) throw BadRefError("cannot hash file");
+    return printHash(hash);
 }
 
 
-typedef pair<string, string> Param;
-typedef list<Param> Params;
+typedef map<string, string> Params;
 
 
-void readPkgDescr(const string & pkgfile,
+void readPkgDescr(const string & hash,
     Params & pkgImports, Params & fileImports, Params & arguments)
 {
+    string pkgfile;
+
+    if (!queryDB(dbRefs, hash, pkgfile))
+        throw Error("unknown package " + hash);
+
+    //    cerr << "reading information about " + hash + " from " + pkgfile + "\n";
+
+    /* Verify that the file hasn't changed. !!! race */
+    if (hashFile(pkgfile) != hash)
+        throw Error("file " + pkgfile + " is stale");
+
     ifstream file;
     file.exceptions(ios::badbit);
     file.open(pkgfile.c_str());
@@ -206,13 +229,13 @@ void readPkgDescr(const string & pkgfile,
         str >> name >> op >> ref;
 
         if (op == "<-") {
-            checkRef(ref);
-            pkgImports.push_back(Param(name, ref));
+            checkHash(ref);
+            pkgImports[name] = ref;
         } else if (op == "=") {
-            checkRef(ref);
-            fileImports.push_back(Param(name, ref));
+            checkHash(ref);
+            fileImports[name] = ref;
         } else if (op == ":")
-            arguments.push_back(Param(name, ref));
+            arguments[name] = ref;
         else throw Error("invalid operator " + op);
     }
 }
@@ -226,20 +249,9 @@ typedef map<string, string> Environment;
 
 void fetchDeps(string hash, Environment & env)
 {
-    string pkgfile;
-
-    if (!queryDB(dbRefs, hash, pkgfile))
-        throw Error("unknown package " + hash);
-
-    cerr << "reading information about " + hash + " from " + pkgfile + "\n";
-
-    /* Verify that the file hasn't changed. !!! race */
-    if (makeRef(pkgfile) != hash)
-        throw Error("file " + pkgfile + " is stale");
-
     /* Read the package description file. */
     Params pkgImports, fileImports, arguments;
-    readPkgDescr(pkgfile, pkgImports, fileImports, arguments);
+    readPkgDescr(hash, pkgImports, fileImports, arguments);
 
     /* Recursively fetch all the dependencies, filling in the
        environment as we go along. */
@@ -264,7 +276,7 @@ void fetchDeps(string hash, Environment & env)
         if (!queryDB(dbRefs, it->second, file))
             throw Error("unknown file " + it->second);
 
-        if (makeRef(file) != it->second)
+        if (hashFile(file) != it->second)
             throw Error("file " + file + " is stale");
 
         env[it->first] = file;
@@ -374,7 +386,7 @@ void installPkg(string hash)
 string getPkg(string hash)
 {
     string path;
-    checkRef(hash);
+    checkHash(hash);
     while (!queryDB(dbInstPkgs, hash, path))
         installPkg(hash);
     return path;
@@ -434,6 +446,20 @@ void runPkg(string hash)
 }
 
 
+void ensurePkg(string hash)
+{
+    Params pkgImports, fileImports, arguments;
+    readPkgDescr(hash, pkgImports, fileImports, arguments);
+
+    if (fileImports.find("build") != fileImports.end())
+        getPkg(hash);
+    else if (fileImports.find("run") != fileImports.end()) {
+        Environment env;
+        fetchDeps(hash, env);
+    } else throw Error("invalid descriptor");
+}
+
+
 string absPath(string filename)
 {
     if (filename[0] != '/') {
@@ -450,14 +476,14 @@ string absPath(string filename)
 void registerFile(string filename)
 {
     filename = absPath(filename);
-    setDB(dbRefs, makeRef(filename), filename);
+    setDB(dbRefs, hashFile(filename), filename);
 }
 
 
 /* This is primarily used for bootstrapping. */
 void registerInstalledPkg(string hash, string path)
 {
-    checkRef(hash);
+    checkHash(hash);
     if (path == "")
         delDB(dbInstPkgs, hash);
     else
@@ -483,8 +509,10 @@ void verifyDB()
          it != fileRefs.end(); it++)
     {
         try {
-            if (makeRef(it->second) != it->first)
+            if (hashFile(it->second) != it->first) {
+                cerr << "file " << it->second << " has changed\n";
                 delDB(dbRefs, it->first);
+            }
         } catch (BadRefError e) { /* !!! better error check */ 
             cerr << "file " << it->second << " has disappeared\n";
             delDB(dbRefs, it->first);
@@ -519,48 +547,136 @@ void listInstalledPkgs()
 
     for (DBPairs::iterator it = instPkgs.begin();
          it != instPkgs.end(); it++)
+        cout << it->first << endl;
+}
+
+
+void printInfo(vector<string> hashes)
+{
+    for (vector<string>::iterator it = hashes.begin();
+         it != hashes.end(); it++)
     {
-        string descr;
-        if (!queryDB(dbRefs, it->first, descr))
-            descr = "descriptor missing";
-        cout << it->first << " " << descr << endl;
+        try {
+            Params pkgImports, fileImports, arguments;
+            readPkgDescr(*it, pkgImports, fileImports, arguments);
+            cout << *it << " " << getFromEnv(arguments, "id") << endl;
+        } catch (Error & e) {
+            cout << *it << " (descriptor missing)\n";
+        }
     }
 }
 
 
-void run(int argc, char * * argv)
+void computeClosure(const vector<string> & rootHashes, 
+    set<string> & result)
+{
+    list<string> workList(rootHashes.begin(), rootHashes.end());
+    set<string> doneSet;
+
+    while (!workList.empty()) {
+        string hash = workList.front();
+        workList.pop_front();
+        
+        if (doneSet.find(hash) == doneSet.end()) {
+            doneSet.insert(hash);
+    
+            Params pkgImports, fileImports, arguments;
+            readPkgDescr(hash, pkgImports, fileImports, arguments);
+
+            for (Params::iterator it = pkgImports.begin();
+                 it != pkgImports.end(); it++)
+                workList.push_back(it->second);
+        }
+    }
+
+    result = doneSet;
+}
+
+
+void printClosure(const vector<string> & rootHashes)
+{
+    set<string> allHashes;
+    computeClosure(rootHashes, allHashes);
+    for (set<string>::iterator it = allHashes.begin();
+         it != allHashes.end(); it++)
+        cout << *it << endl;
+}
+
+
+string dotQuote(const string & s)
+{
+    return "\"" + s + "\"";
+}
+
+
+void printGraph(vector<string> rootHashes)
+{
+    set<string> allHashes;
+    computeClosure(rootHashes, allHashes);
+
+    cout << "digraph G {\n";
+
+    for (set<string>::iterator it = allHashes.begin();
+         it != allHashes.end(); it++)
+    {
+        Params pkgImports, fileImports, arguments;
+        readPkgDescr(*it, pkgImports, fileImports, arguments);
+
+        cout << dotQuote(*it) << "[label = \"" 
+             << getFromEnv(arguments, "id")
+             << "\"];\n";
+
+        for (Params::iterator it2 = pkgImports.begin();
+             it2 != pkgImports.end(); it2++)
+            cout << dotQuote(it2->second) << " -> " 
+                 << dotQuote(*it) << ";\n";
+    }
+
+    cout << "}\n";
+}
+
+
+void run(vector<string> args)
 {
     UsageError argcError("wrong number of arguments");
     string cmd;
 
-    if (argc < 1)
-        throw UsageError("no command specified");
-
-    cmd = argv[0];
-    argc--, argv++;
+    if (args.size() < 1) throw UsageError("no command specified");
+    
+    cmd = args[0];
+    args.erase(args.begin()); // O(n)
 
     if (cmd == "init") {
-        if (argc != 0) throw argcError;
+        if (args.size() != 0) throw argcError;
         initDB();
     } else if (cmd == "verify") {
-        if (argc != 0) throw argcError;
+        if (args.size() != 0) throw argcError;
         verifyDB();
     } else if (cmd == "getpkg") {
-        if (argc != 1) throw argcError;
-        string path = getPkg(argv[0]);
+        if (args.size() != 1) throw argcError;
+        string path = getPkg(args[0]);
         cout << path << endl;
     } else if (cmd == "run") {
-        if (argc != 1) throw argcError;
-        runPkg(argv[0]);
+        if (args.size() != 1) throw argcError;
+        runPkg(args[0]);
+    } else if (cmd == "ensure") {
+        if (args.size() != 1) throw argcError;
+        ensurePkg(args[0]);
     } else if (cmd == "regfile") {
-        if (argc != 1) throw argcError;
-        registerFile(argv[0]);
+        if (args.size() != 1) throw argcError;
+        registerFile(args[0]);
     } else if (cmd == "reginst") {
-        if (argc != 2) throw argcError;
-        registerInstalledPkg(argv[0], argv[1]);
+        if (args.size() != 2) throw argcError;
+        registerInstalledPkg(args[0], args[1]);
     } else if (cmd == "listinst") {
-        if (argc != 0) throw argcError;
+        if (args.size() != 0) throw argcError;
         listInstalledPkgs();
+    } else if (cmd == "info") {
+        printInfo(args);
+    } else if (cmd == "closure") {
+        printClosure(args);
+    } else if (cmd == "graph") {
+        printGraph(args);
     } else
         throw UsageError("unknown command: " + string(cmd));
 }
@@ -594,6 +710,20 @@ Subcommands:
 
   run HASH
     Run the descriptor referenced by HASH.
+
+  ensure HASH
+    Like getpkg, but if HASH refers to a run descriptor, fetch only
+    the dependencies.
+
+  info HASH...
+    Print information about the specified descriptors.
+
+  closure HASH...
+    Determine the closure of the set of descriptors under the import
+    relation, starting at the given roots.
+
+  graph HASH...
+    Like closure, but print a dot graph specification.
 ";
 }
 
@@ -630,11 +760,13 @@ void main2(int argc, char * * argv)
         }
     }
 
-    argc -= optind, argv += optind;
-    run(argc, argv);
+    vector<string> args;
+    argc--, argv++;
+    while (argc--) args.push_back(*argv++);
+    run(args);
 }
 
-    
+
 int main(int argc, char * * argv)
 {
     prog = argv[0];