From 73c53935d00660301e9408beabf1c80d6ef48610 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 24 Mar 2003 17:49:56 +0000 Subject: [PATCH] * For efficiency: md5 integrated into nix. * Command `nix ensure' which is like `nix getpkg' except that if the has refers to a run action it will just ensure that the imports are there. * Command `nix closure' to print out the closure of the set of descriptors under the import relation, starting at a set of roots. This can be used for garbage collection (e.g., given a list of `activated' packages, we can delete all packages not reachable from those). * Command `nix graph' to print out a Dot graph of the dependency graph. * `nix-addroot' adds a root for the (unimplemented) garbage collector. --- src/Makefile | 12 +- src/md5.c | 435 ++++++++++++++++++++++++++++++++++++++++++++++++ src/md5.h | 151 +++++++++++++++++ src/nix-addroot | 18 ++ src/nix.cc | 260 ++++++++++++++++++++++------- 5 files changed, 810 insertions(+), 66 deletions(-) create mode 100644 src/md5.c create mode 100644 src/md5.h create mode 100755 src/nix-addroot diff --git a/src/Makefile b/src/Makefile index 6b4c792bb..237257275 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,8 +2,16 @@ all: nix nix-instantiate SYSTEM = $(shell ./config.guess) -nix: nix.cc - g++ -g -Wall -o nix nix.cc -ldb_cxx-4 -DSYSTEM=\"$(SYSTEM)\" +nix: nix.o md5.o + g++ -g -o $@ $^ -ldb_cxx-4 + +%.o: %.cc + g++ -g -Wall -o $@ -c $< -DSYSTEM=\"$(SYSTEM)\" + +%.o: %.c + gcc -g -Wall -o $@ -c $< -DSYSTEM=\"$(SYSTEM)\" + +md5.o: md5.c md5.h nix-instantiate: nix-instantiate.in sed "s/@SYSTEM@/$(SYSTEM)/" < $^ > $@ diff --git a/src/md5.c b/src/md5.c new file mode 100644 index 000000000..64ade3c6f --- /dev/null +++ b/src/md5.c @@ -0,0 +1,435 @@ +/* Functions to compute MD5 message digest of files or memory blocks. + according to the definition of MD5 in RFC 1321 from April 1992. + Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Written by Ulrich Drepper , 1995. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#include +#include + +#include "md5.h" + +#ifdef WORDS_BIGENDIAN +# define SWAP(n) \ + (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) +#else +# define SWAP(n) (n) +#endif + + +/* This array contains the bytes used to pad the buffer to the next + 64-byte boundary. (RFC 1321, 3.1: Step 1) */ +static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; + + +/* Initialize structure containing state of computation. + (RFC 1321, 3.3: Step 3) */ +void +md5_init_ctx (ctx) + struct md5_ctx *ctx; +{ + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + + ctx->total[0] = ctx->total[1] = 0; + ctx->buflen = 0; +} + +/* Put result from CTX in first 16 bytes following RESBUF. The result + must be in little endian byte order. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +void * +md5_read_ctx (ctx, resbuf) + const struct md5_ctx *ctx; + void *resbuf; +{ + ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A); + ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B); + ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C); + ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D); + + return resbuf; +} + +/* Process the remaining bytes in the internal buffer and the usual + prolog according to the standard and write the result to RESBUF. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +void * +md5_finish_ctx (ctx, resbuf) + struct md5_ctx *ctx; + void *resbuf; +{ + /* Take yet unprocessed bytes into account. */ + md5_uint32 bytes = ctx->buflen; + size_t pad; + + /* Now count remaining bytes. */ + ctx->total[0] += bytes; + if (ctx->total[0] < bytes) + ++ctx->total[1]; + + pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; + memcpy (&ctx->buffer[bytes], fillbuf, pad); + + /* Put the 64-bit file length in *bits* at the end of the buffer. */ + *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP (ctx->total[0] << 3); + *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP ((ctx->total[1] << 3) | + (ctx->total[0] >> 29)); + + /* Process last bytes. */ + md5_process_block (ctx->buffer, bytes + pad + 8, ctx); + + return md5_read_ctx (ctx, resbuf); +} + +/* Compute MD5 message digest for bytes read from STREAM. The + resulting message digest number will be written into the 16 bytes + beginning at RESBLOCK. */ +int +md5_stream (stream, resblock) + FILE *stream; + void *resblock; +{ + /* Important: BLOCKSIZE must be a multiple of 64. */ +#define BLOCKSIZE 4096 + struct md5_ctx ctx; + char buffer[BLOCKSIZE + 72]; + size_t sum; + + /* Initialize the computation context. */ + md5_init_ctx (&ctx); + + /* Iterate over full file contents. */ + while (1) + { + /* We read the file in blocks of BLOCKSIZE bytes. One call of the + computation function processes the whole buffer so that with the + next round of the loop another block can be read. */ + size_t n; + sum = 0; + + /* Read block. Take care for partial reads. */ + do + { + n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); + + sum += n; + } + while (sum < BLOCKSIZE && n != 0); + if (n == 0 && ferror (stream)) + return 1; + + /* If end of file is reached, end the loop. */ + if (n == 0) + break; + + /* Process buffer with BLOCKSIZE bytes. Note that + BLOCKSIZE % 64 == 0 + */ + md5_process_block (buffer, BLOCKSIZE, &ctx); + } + + /* Add the last bytes if necessary. */ + if (sum > 0) + md5_process_bytes (buffer, sum, &ctx); + + /* Construct result in desired memory. */ + md5_finish_ctx (&ctx, resblock); + return 0; +} + +/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The + result is always in little endian byte order, so that a byte-wise + output yields to the wanted ASCII representation of the message + digest. */ +void * +md5_buffer (buffer, len, resblock) + const char *buffer; + size_t len; + void *resblock; +{ + struct md5_ctx ctx; + + /* Initialize the computation context. */ + md5_init_ctx (&ctx); + + /* Process whole buffer but last len % 64 bytes. */ + md5_process_bytes (buffer, len, &ctx); + + /* Put result in desired memory area. */ + return md5_finish_ctx (&ctx, resblock); +} + + +void +md5_process_bytes (buffer, len, ctx) + const void *buffer; + size_t len; + struct md5_ctx *ctx; +{ + /* When we already have some bits in our internal buffer concatenate + both inputs first. */ + if (ctx->buflen != 0) + { + size_t left_over = ctx->buflen; + size_t add = 128 - left_over > len ? len : 128 - left_over; + + memcpy (&ctx->buffer[left_over], buffer, add); + ctx->buflen += add; + + if (ctx->buflen > 64) + { + md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); + + ctx->buflen &= 63; + /* The regions in the following copy operation cannot overlap. */ + memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63], + ctx->buflen); + } + + buffer = (const char *) buffer + add; + len -= add; + } + + /* Process available complete blocks. */ + if (len >= 64) + { +#if !_STRING_ARCH_unaligned +/* To check alignment gcc has an appropriate operator. Other + compilers don't. */ +# if __GNUC__ >= 2 +# define UNALIGNED_P(p) (((md5_uintptr) p) % __alignof__ (md5_uint32) != 0) +# else +# define UNALIGNED_P(p) (((md5_uintptr) p) % sizeof (md5_uint32) != 0) +# endif + if (UNALIGNED_P (buffer)) + while (len > 64) + { + md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); + buffer = (const char *) buffer + 64; + len -= 64; + } + else +#endif + { + md5_process_block (buffer, len & ~63, ctx); + buffer = (const char *) buffer + (len & ~63); + len &= 63; + } + } + + /* Move remaining bytes in internal buffer. */ + if (len > 0) + { + size_t left_over = ctx->buflen; + + memcpy (&ctx->buffer[left_over], buffer, len); + left_over += len; + if (left_over >= 64) + { + md5_process_block (ctx->buffer, 64, ctx); + left_over -= 64; + memcpy (ctx->buffer, &ctx->buffer[64], left_over); + } + ctx->buflen = left_over; + } +} + + +/* These are the four functions used in the four steps of the MD5 algorithm + and defined in the RFC 1321. The first function is a little bit optimized + (as found in Colin Plumbs public domain implementation). */ +/* #define FF(b, c, d) ((b & c) | (~b & d)) */ +#define FF(b, c, d) (d ^ (b & (c ^ d))) +#define FG(b, c, d) FF (d, b, c) +#define FH(b, c, d) (b ^ c ^ d) +#define FI(b, c, d) (c ^ (b | ~d)) + +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 64 == 0. */ + +void +md5_process_block (buffer, len, ctx) + const void *buffer; + size_t len; + struct md5_ctx *ctx; +{ + md5_uint32 correct_words[16]; + const md5_uint32 *words = buffer; + size_t nwords = len / sizeof (md5_uint32); + const md5_uint32 *endp = words + nwords; + md5_uint32 A = ctx->A; + md5_uint32 B = ctx->B; + md5_uint32 C = ctx->C; + md5_uint32 D = ctx->D; + + /* First increment the byte count. RFC 1321 specifies the possible + length of the file up to 2^64 bits. Here we only compute the + number of bytes. Do a double word increment. */ + ctx->total[0] += len; + if (ctx->total[0] < len) + ++ctx->total[1]; + + /* Process all bytes in the buffer with 64 bytes in each round of + the loop. */ + while (words < endp) + { + md5_uint32 *cwp = correct_words; + md5_uint32 A_save = A; + md5_uint32 B_save = B; + md5_uint32 C_save = C; + md5_uint32 D_save = D; + + /* First round: using the given function, the context and a constant + the next context is computed. Because the algorithms processing + unit is a 32-bit word and it is determined to work on words in + little endian byte order we perhaps have to change the byte order + before the computation. To reduce the work for the next steps + we store the swapped words in the array CORRECT_WORDS. */ + +#define OP(a, b, c, d, s, T) \ + do \ + { \ + a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \ + ++words; \ + CYCLIC (a, s); \ + a += b; \ + } \ + while (0) + + /* It is unfortunate that C does not provide an operator for + cyclic rotation. Hope the C compiler is smart enough. */ +#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) + + /* Before we start, one word to the strange constants. + They are defined in RFC 1321 as + + T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 + */ + + /* Round 1. */ + OP (A, B, C, D, 7, 0xd76aa478); + OP (D, A, B, C, 12, 0xe8c7b756); + OP (C, D, A, B, 17, 0x242070db); + OP (B, C, D, A, 22, 0xc1bdceee); + OP (A, B, C, D, 7, 0xf57c0faf); + OP (D, A, B, C, 12, 0x4787c62a); + OP (C, D, A, B, 17, 0xa8304613); + OP (B, C, D, A, 22, 0xfd469501); + OP (A, B, C, D, 7, 0x698098d8); + OP (D, A, B, C, 12, 0x8b44f7af); + OP (C, D, A, B, 17, 0xffff5bb1); + OP (B, C, D, A, 22, 0x895cd7be); + OP (A, B, C, D, 7, 0x6b901122); + OP (D, A, B, C, 12, 0xfd987193); + OP (C, D, A, B, 17, 0xa679438e); + OP (B, C, D, A, 22, 0x49b40821); + + /* For the second to fourth round we have the possibly swapped words + in CORRECT_WORDS. Redefine the macro to take an additional first + argument specifying the function to use. */ +#undef OP +#define OP(f, a, b, c, d, k, s, T) \ + do \ + { \ + a += f (b, c, d) + correct_words[k] + T; \ + CYCLIC (a, s); \ + a += b; \ + } \ + while (0) + + /* Round 2. */ + OP (FG, A, B, C, D, 1, 5, 0xf61e2562); + OP (FG, D, A, B, C, 6, 9, 0xc040b340); + OP (FG, C, D, A, B, 11, 14, 0x265e5a51); + OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); + OP (FG, A, B, C, D, 5, 5, 0xd62f105d); + OP (FG, D, A, B, C, 10, 9, 0x02441453); + OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); + OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); + OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); + OP (FG, D, A, B, C, 14, 9, 0xc33707d6); + OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); + OP (FG, B, C, D, A, 8, 20, 0x455a14ed); + OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); + OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); + OP (FG, C, D, A, B, 7, 14, 0x676f02d9); + OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); + + /* Round 3. */ + OP (FH, A, B, C, D, 5, 4, 0xfffa3942); + OP (FH, D, A, B, C, 8, 11, 0x8771f681); + OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); + OP (FH, B, C, D, A, 14, 23, 0xfde5380c); + OP (FH, A, B, C, D, 1, 4, 0xa4beea44); + OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); + OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); + OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); + OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); + OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); + OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); + OP (FH, B, C, D, A, 6, 23, 0x04881d05); + OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); + OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); + OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); + OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); + + /* Round 4. */ + OP (FI, A, B, C, D, 0, 6, 0xf4292244); + OP (FI, D, A, B, C, 7, 10, 0x432aff97); + OP (FI, C, D, A, B, 14, 15, 0xab9423a7); + OP (FI, B, C, D, A, 5, 21, 0xfc93a039); + OP (FI, A, B, C, D, 12, 6, 0x655b59c3); + OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); + OP (FI, C, D, A, B, 10, 15, 0xffeff47d); + OP (FI, B, C, D, A, 1, 21, 0x85845dd1); + OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); + OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); + OP (FI, C, D, A, B, 6, 15, 0xa3014314); + OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); + OP (FI, A, B, C, D, 4, 6, 0xf7537e82); + OP (FI, D, A, B, C, 11, 10, 0xbd3af235); + OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); + OP (FI, B, C, D, A, 9, 21, 0xeb86d391); + + /* Add the starting values of the context. */ + A += A_save; + B += B_save; + C += C_save; + D += D_save; + } + + /* Put checksum in context given as argument. */ + ctx->A = A; + ctx->B = B; + ctx->C = C; + ctx->D = D; +} diff --git a/src/md5.h b/src/md5.h new file mode 100644 index 000000000..6301e4558 --- /dev/null +++ b/src/md5.h @@ -0,0 +1,151 @@ +/* Declaration of functions and data types used for MD5 sum computing + library functions. + Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _MD5_H +#define _MD5_H 1 + +#include + +#if defined HAVE_LIMITS_H || _LIBC +# include +#endif + +/* The following contortions are an attempt to use the C preprocessor + to determine an unsigned integral type that is 32 bits wide. An + alternative approach is to use autoconf's AC_CHECK_SIZEOF macro, but + doing that would require that the configure script compile and *run* + the resulting executable. Locally running cross-compiled executables + is usually not possible. */ + +#ifdef _LIBC +# include +typedef uint32_t md5_uint32; +typedef uintptr_t md5_uintptr; +#else +# if defined __STDC__ && __STDC__ +# define UINT_MAX_32_BITS 4294967295U +# else +# define UINT_MAX_32_BITS 0xFFFFFFFF +# endif + +/* If UINT_MAX isn't defined, assume it's a 32-bit type. + This should be valid for all systems GNU cares about because + that doesn't include 16-bit systems, and only modern systems + (that certainly have ) have 64+-bit integral types. */ + +# ifndef UINT_MAX +# define UINT_MAX UINT_MAX_32_BITS +# endif + +# if UINT_MAX == UINT_MAX_32_BITS + typedef unsigned int md5_uint32; +# else +# if USHRT_MAX == UINT_MAX_32_BITS + typedef unsigned short md5_uint32; +# else +# if ULONG_MAX == UINT_MAX_32_BITS + typedef unsigned long md5_uint32; +# else + /* The following line is intended to evoke an error. + Using #error is not portable enough. */ + "Cannot determine unsigned 32-bit data type." +# endif +# endif +# endif +/* We have to make a guess about the integer type equivalent in size + to pointers which should always be correct. */ +typedef unsigned long int md5_uintptr; +#endif + +#undef __P +#if defined (__STDC__) && __STDC__ +# define __P(x) x +#else +# define __P(x) () +#endif + +/* Structure to save state of computation between the single steps. */ +struct md5_ctx +{ + md5_uint32 A; + md5_uint32 B; + md5_uint32 C; + md5_uint32 D; + + md5_uint32 total[2]; + md5_uint32 buflen; + char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32)))); +}; + +/* + * The following three functions are build up the low level used in + * the functions `md5_stream' and `md5_buffer'. + */ + +/* Initialize structure containing state of computation. + (RFC 1321, 3.3: Step 3) */ +extern void md5_init_ctx __P ((struct md5_ctx *ctx)); + +/* Starting with the result of former calls of this function (or the + initialization function update the context for the next LEN bytes + starting at BUFFER. + It is necessary that LEN is a multiple of 64!!! */ +extern void md5_process_block __P ((const void *buffer, size_t len, + struct md5_ctx *ctx)); + +/* Starting with the result of former calls of this function (or the + initialization function update the context for the next LEN bytes + starting at BUFFER. + It is NOT required that LEN is a multiple of 64. */ +extern void md5_process_bytes __P ((const void *buffer, size_t len, + struct md5_ctx *ctx)); + +/* Process the remaining bytes in the buffer and put result from CTX + in first 16 bytes following RESBUF. The result is always in little + endian byte order, so that a byte-wise output yields to the wanted + ASCII representation of the message digest. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +extern void *md5_finish_ctx __P ((struct md5_ctx *ctx, void *resbuf)); + + +/* Put result from CTX in first 16 bytes following RESBUF. The result is + always in little endian byte order, so that a byte-wise output yields + to the wanted ASCII representation of the message digest. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +extern void *md5_read_ctx __P ((const struct md5_ctx *ctx, void *resbuf)); + + +/* Compute MD5 message digest for bytes read from STREAM. The + resulting message digest number will be written into the 16 bytes + beginning at RESBLOCK. */ +extern int md5_stream __P ((FILE *stream, void *resblock)); + +/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The + result is always in little endian byte order, so that a byte-wise + output yields to the wanted ASCII representation of the message + digest. */ +extern void *md5_buffer __P ((const char *buffer, size_t len, + void *resblock)); + +#endif /* md5.h */ diff --git a/src/nix-addroot b/src/nix-addroot new file mode 100755 index 000000000..3ab9e8a25 --- /dev/null +++ b/src/nix-addroot @@ -0,0 +1,18 @@ +#! /bin/sh + +ROOTLIST=~/.nixroots + +if ! test -f $ROOTLIST; then + touch $ROOTLIST +fi + +for i in $*; do + if nix ensure $i > /dev/null; then + if grep -q $i $ROOTLIST; then + echo $i already is a root + else + echo adding root $i + echo $i >> $ROOTLIST + fi + fi +done diff --git a/src/nix.cc b/src/nix.cc index d53a809b7..4ff49eabd 100644 --- a/src/nix.cc +++ b/src/nix.cc @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -14,6 +16,10 @@ #include +extern "C" { +#include "md5.h" +} + using namespace std; @@ -146,8 +152,20 @@ void enumDB(const string & dbname, DBPairs & contents) } +string printHash(unsigned char * buf) +{ + ostringstream str; + for (int i = 0; i < 16; i++) { + str.fill('0'); + str.width(2); + str << hex << (int) buf[i]; + } + return str.str(); +} + + /* Verify that a reference is valid (that is, is a MD5 hash code). */ -void checkRef(const string & s) +void checkHash(const string & s) { string err = "invalid reference: " + s; if (s.length() != 32) @@ -162,31 +180,36 @@ void checkRef(const string & s) /* Compute the MD5 hash of a file. */ -string makeRef(string filename) +string hashFile(string filename) { - char hash[33]; - - FILE * pipe = popen(("md5sum " + filename + " 2> /dev/null").c_str(), "r"); - if (!pipe) throw BadRefError("cannot execute md5sum"); - - if (fread(hash, 32, 1, pipe) != 1) - throw BadRefError("cannot read hash from md5sum of " + filename); - hash[32] = 0; - - pclose(pipe); - - checkRef(hash); - return hash; + unsigned char hash[16]; + FILE * file = fopen(filename.c_str(), "rb"); + if (!file) + throw BadRefError("file `" + filename + "' does not exist"); + int err = md5_stream(file, hash); + fclose(file); + if (err) throw BadRefError("cannot hash file"); + return printHash(hash); } -typedef pair Param; -typedef list Params; +typedef map Params; -void readPkgDescr(const string & pkgfile, +void readPkgDescr(const string & hash, Params & pkgImports, Params & fileImports, Params & arguments) { + string pkgfile; + + if (!queryDB(dbRefs, hash, pkgfile)) + throw Error("unknown package " + hash); + + // cerr << "reading information about " + hash + " from " + pkgfile + "\n"; + + /* Verify that the file hasn't changed. !!! race */ + if (hashFile(pkgfile) != hash) + throw Error("file " + pkgfile + " is stale"); + ifstream file; file.exceptions(ios::badbit); file.open(pkgfile.c_str()); @@ -206,13 +229,13 @@ void readPkgDescr(const string & pkgfile, str >> name >> op >> ref; if (op == "<-") { - checkRef(ref); - pkgImports.push_back(Param(name, ref)); + checkHash(ref); + pkgImports[name] = ref; } else if (op == "=") { - checkRef(ref); - fileImports.push_back(Param(name, ref)); + checkHash(ref); + fileImports[name] = ref; } else if (op == ":") - arguments.push_back(Param(name, ref)); + arguments[name] = ref; else throw Error("invalid operator " + op); } } @@ -226,20 +249,9 @@ typedef map Environment; void fetchDeps(string hash, Environment & env) { - string pkgfile; - - if (!queryDB(dbRefs, hash, pkgfile)) - throw Error("unknown package " + hash); - - cerr << "reading information about " + hash + " from " + pkgfile + "\n"; - - /* Verify that the file hasn't changed. !!! race */ - if (makeRef(pkgfile) != hash) - throw Error("file " + pkgfile + " is stale"); - /* Read the package description file. */ Params pkgImports, fileImports, arguments; - readPkgDescr(pkgfile, pkgImports, fileImports, arguments); + readPkgDescr(hash, pkgImports, fileImports, arguments); /* Recursively fetch all the dependencies, filling in the environment as we go along. */ @@ -264,7 +276,7 @@ void fetchDeps(string hash, Environment & env) if (!queryDB(dbRefs, it->second, file)) throw Error("unknown file " + it->second); - if (makeRef(file) != it->second) + if (hashFile(file) != it->second) throw Error("file " + file + " is stale"); env[it->first] = file; @@ -374,7 +386,7 @@ void installPkg(string hash) string getPkg(string hash) { string path; - checkRef(hash); + checkHash(hash); while (!queryDB(dbInstPkgs, hash, path)) installPkg(hash); return path; @@ -434,6 +446,20 @@ void runPkg(string hash) } +void ensurePkg(string hash) +{ + Params pkgImports, fileImports, arguments; + readPkgDescr(hash, pkgImports, fileImports, arguments); + + if (fileImports.find("build") != fileImports.end()) + getPkg(hash); + else if (fileImports.find("run") != fileImports.end()) { + Environment env; + fetchDeps(hash, env); + } else throw Error("invalid descriptor"); +} + + string absPath(string filename) { if (filename[0] != '/') { @@ -450,14 +476,14 @@ string absPath(string filename) void registerFile(string filename) { filename = absPath(filename); - setDB(dbRefs, makeRef(filename), filename); + setDB(dbRefs, hashFile(filename), filename); } /* This is primarily used for bootstrapping. */ void registerInstalledPkg(string hash, string path) { - checkRef(hash); + checkHash(hash); if (path == "") delDB(dbInstPkgs, hash); else @@ -483,8 +509,10 @@ void verifyDB() it != fileRefs.end(); it++) { try { - if (makeRef(it->second) != it->first) + if (hashFile(it->second) != it->first) { + cerr << "file " << it->second << " has changed\n"; delDB(dbRefs, it->first); + } } catch (BadRefError e) { /* !!! better error check */ cerr << "file " << it->second << " has disappeared\n"; delDB(dbRefs, it->first); @@ -519,48 +547,136 @@ void listInstalledPkgs() for (DBPairs::iterator it = instPkgs.begin(); it != instPkgs.end(); it++) + cout << it->first << endl; +} + + +void printInfo(vector hashes) +{ + for (vector::iterator it = hashes.begin(); + it != hashes.end(); it++) { - string descr; - if (!queryDB(dbRefs, it->first, descr)) - descr = "descriptor missing"; - cout << it->first << " " << descr << endl; + try { + Params pkgImports, fileImports, arguments; + readPkgDescr(*it, pkgImports, fileImports, arguments); + cout << *it << " " << getFromEnv(arguments, "id") << endl; + } catch (Error & e) { + cout << *it << " (descriptor missing)\n"; + } } } -void run(int argc, char * * argv) +void computeClosure(const vector & rootHashes, + set & result) +{ + list workList(rootHashes.begin(), rootHashes.end()); + set doneSet; + + while (!workList.empty()) { + string hash = workList.front(); + workList.pop_front(); + + if (doneSet.find(hash) == doneSet.end()) { + doneSet.insert(hash); + + Params pkgImports, fileImports, arguments; + readPkgDescr(hash, pkgImports, fileImports, arguments); + + for (Params::iterator it = pkgImports.begin(); + it != pkgImports.end(); it++) + workList.push_back(it->second); + } + } + + result = doneSet; +} + + +void printClosure(const vector & rootHashes) +{ + set allHashes; + computeClosure(rootHashes, allHashes); + for (set::iterator it = allHashes.begin(); + it != allHashes.end(); it++) + cout << *it << endl; +} + + +string dotQuote(const string & s) +{ + return "\"" + s + "\""; +} + + +void printGraph(vector rootHashes) +{ + set allHashes; + computeClosure(rootHashes, allHashes); + + cout << "digraph G {\n"; + + for (set::iterator it = allHashes.begin(); + it != allHashes.end(); it++) + { + Params pkgImports, fileImports, arguments; + readPkgDescr(*it, pkgImports, fileImports, arguments); + + cout << dotQuote(*it) << "[label = \"" + << getFromEnv(arguments, "id") + << "\"];\n"; + + for (Params::iterator it2 = pkgImports.begin(); + it2 != pkgImports.end(); it2++) + cout << dotQuote(it2->second) << " -> " + << dotQuote(*it) << ";\n"; + } + + cout << "}\n"; +} + + +void run(vector args) { UsageError argcError("wrong number of arguments"); string cmd; - if (argc < 1) - throw UsageError("no command specified"); - - cmd = argv[0]; - argc--, argv++; + if (args.size() < 1) throw UsageError("no command specified"); + + cmd = args[0]; + args.erase(args.begin()); // O(n) if (cmd == "init") { - if (argc != 0) throw argcError; + if (args.size() != 0) throw argcError; initDB(); } else if (cmd == "verify") { - if (argc != 0) throw argcError; + if (args.size() != 0) throw argcError; verifyDB(); } else if (cmd == "getpkg") { - if (argc != 1) throw argcError; - string path = getPkg(argv[0]); + if (args.size() != 1) throw argcError; + string path = getPkg(args[0]); cout << path << endl; } else if (cmd == "run") { - if (argc != 1) throw argcError; - runPkg(argv[0]); + if (args.size() != 1) throw argcError; + runPkg(args[0]); + } else if (cmd == "ensure") { + if (args.size() != 1) throw argcError; + ensurePkg(args[0]); } else if (cmd == "regfile") { - if (argc != 1) throw argcError; - registerFile(argv[0]); + if (args.size() != 1) throw argcError; + registerFile(args[0]); } else if (cmd == "reginst") { - if (argc != 2) throw argcError; - registerInstalledPkg(argv[0], argv[1]); + if (args.size() != 2) throw argcError; + registerInstalledPkg(args[0], args[1]); } else if (cmd == "listinst") { - if (argc != 0) throw argcError; + if (args.size() != 0) throw argcError; listInstalledPkgs(); + } else if (cmd == "info") { + printInfo(args); + } else if (cmd == "closure") { + printClosure(args); + } else if (cmd == "graph") { + printGraph(args); } else throw UsageError("unknown command: " + string(cmd)); } @@ -594,6 +710,20 @@ Subcommands: run HASH Run the descriptor referenced by HASH. + + ensure HASH + Like getpkg, but if HASH refers to a run descriptor, fetch only + the dependencies. + + info HASH... + Print information about the specified descriptors. + + closure HASH... + Determine the closure of the set of descriptors under the import + relation, starting at the given roots. + + graph HASH... + Like closure, but print a dot graph specification. "; } @@ -630,11 +760,13 @@ void main2(int argc, char * * argv) } } - argc -= optind, argv += optind; - run(argc, argv); + vector args; + argc--, argv++; + while (argc--) args.push_back(*argv++); + run(args); } - + int main(int argc, char * * argv) { prog = argv[0];