* Cache the manifests in /nix/var/nix/manifests in a SQLite database.

This significantly speeds up the download-using-manifests
  substituter, especially if manifests are very large.  For instance,
  one "nix-build -A geeqie" operation that updated four packages using
  binary patches went from 18.5s to 1.6s.  It also significantly
  reduces memory use.

  The cache is kept in /nix/var/nix/manifests/cache.sqlite.  It's
  updated automatically when manifests are added to or removed from
  /nix/var/nix/manifests.  It might be interesting to have nix-pull
  store manifests directly in the DB, rather than storing them as
  separate flat files, but then we would need a command line interface
  to delete manifests from the DB.
This commit is contained in:
Eelco Dolstra 2011-04-10 23:22:46 +00:00
parent 1e7e4f21ba
commit 5591fcc529
2 changed files with 164 additions and 30 deletions

View file

@ -1,4 +1,7 @@
use strict;
use DBI;
use Cwd;
use File::stat;
sub addPatch {
@ -34,7 +37,7 @@ sub readManifest {
my $manifestVersion = 2;
my ($storePath, $url, $hash, $size, $basePath, $baseHash, $patchType);
my ($narHash, $narSize, $references, $deriver, $hashAlgo, $copyFrom, $system);
my ($narHash, $narSize, $references, $deriver, $copyFrom, $system);
while (<MANIFEST>) {
chomp;
@ -59,7 +62,6 @@ sub readManifest {
undef $system;
$references = "";
$deriver = "";
$hashAlgo = "md5";
}
} else {
@ -83,7 +85,7 @@ sub readManifest {
{ url => $url, hash => $hash, size => $size
, narHash => $narHash, narSize => $narSize
, references => $references
, deriver => $deriver, hashAlgo => $hashAlgo
, deriver => $deriver
, system => $system
};
}
@ -95,7 +97,7 @@ sub readManifest {
{ url => $url, hash => $hash, size => $size
, basePath => $basePath, baseHash => $baseHash
, narHash => $narHash, narSize => $narSize
, patchType => $patchType, hashAlgo => $hashAlgo
, patchType => $patchType
};
}
@ -193,4 +195,136 @@ sub writeManifest {
}
sub updateManifestDB {
my $manifestDir = ($ENV{"NIX_MANIFESTS_DIR"} or "@localstatedir@/nix/manifests");
my $dbPath = "$manifestDir/cache.sqlite";
# Open/create the database.
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbPath", "", "")
or die "cannot open database `$dbPath'";
$dbh->{AutoCommit} = 0;
$dbh->{RaiseError} = 1;
$dbh->{PrintError} = 0;
$dbh->do("pragma foreign_keys = on");
# Initialise the database schema, if necessary.
$dbh->do(<<EOF);
create table if not exists Manifests (
id integer primary key autoincrement not null,
path text unique not null,
timestamp integer not null
);
EOF
$dbh->do(<<EOF);
create table if not exists NARs (
id integer primary key autoincrement not null,
manifest integer not null,
storePath text not null,
url text not null,
hash text,
size integer,
narHash text,
narSize integer,
refs text,
deriver text,
system text,
foreign key (manifest) references Manifests(id) on delete cascade
);
EOF
$dbh->do("create index if not exists NARs_storePath on NARs(storePath)");
$dbh->do(<<EOF);
create table if not exists Patches (
id integer primary key autoincrement not null,
manifest integer not null,
storePath text not null,
basePath text not null,
baseHash text not null,
url text not null,
hash text,
size integer,
narHash text,
narSize integer,
patchType text not null,
foreign key (manifest) references Manifests(id) on delete cascade
);
EOF
$dbh->do("create index if not exists Patches_storePath on Patches(storePath)");
# !!! locking?
# Read each manifest in $manifestDir and add it to the database,
# unless we've already done so on a previous run.
my %seen;
for my $manifest (glob "$manifestDir/*.nixmanifest") {
$manifest = Cwd::abs_path($manifest);
my $timestamp = lstat($manifest)->mtime;
$seen{$manifest} = 1;
next if scalar @{$dbh->selectcol_arrayref(
"select 1 from Manifests where path = ? and timestamp = ?",
{}, $manifest, $timestamp)} == 1;
# !!! Insert directly into the DB.
my %narFiles;
my %patches;
my $version = readManifest($manifest, \%narFiles, \%patches);
if ($version < 3) {
die "you have an old-style manifest `$manifest'; please delete it";
}
if ($version >= 10) {
die "manifest `$manifest' is too new; please delete it or upgrade Nix";
}
$dbh->do("delete from Manifests where path = ?", {}, $manifest);
$dbh->do("insert into Manifests(path, timestamp) values (?, ?)",
{}, $manifest, $timestamp);
my $id = $dbh->sqlite_last_insert_rowid();
foreach my $storePath (keys %narFiles) {
my $narFileList = $narFiles{$storePath};
foreach my $narFile (@{$narFiles{$storePath}}) {
$dbh->do(
"insert into NARs(manifest, storePath, url, hash, size, narHash, " .
"narSize, refs, deriver, system) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
{}, $id, $storePath, $narFile->{url}, $narFile->{hash}, $narFile->{size},
$narFile->{narHash}, $narFile->{narSize}, $narFile->{references},
$narFile->{deriver}, $narFile->{system});
}
}
foreach my $storePath (keys %patches) {
my $patchList = $patches{$storePath};
foreach my $patch (@{$patchList}) {
$dbh->do(
"insert into Patches(manifest, storePath, basePath, baseHash, url, hash, " .
"size, narHash, narSize, patchType) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
{}, $id, $storePath, $patch->{basePath}, $patch->{baseHash}, $patch->{url},
$patch->{hash}, $patch->{size}, $patch->{narHash}, $patch->{narSize},
$patch->{patchType});
}
}
}
# Removed cached information for removed manifests from the DB.
foreach my $manifest (@{$dbh->selectcol_arrayref("select path from Manifests")}) {
next if defined $seen{$manifest};
$dbh->do("delete from Manifests where path = ?", {}, $manifest);
}
$dbh->commit;
return $dbh;
}
return 1;

View file

@ -17,21 +17,8 @@ my $logFile = "@localstatedir@/log/nix/downloads";
my $fast = 1;
# Load all manifests.
my %narFiles;
my %patches;
for my $manifest (glob "$manifestDir/*.nixmanifest") {
my $version = readManifest($manifest, \%narFiles, \%patches);
if ($version < 3) {
print STDERR "you have an old-style manifest `$manifest'; please delete it\n";
exit 1;
}
if ($version >= 10) {
print STDERR "manifest `$manifest' is too new; please delete it or upgrade Nix\n";
exit 1;
}
}
# Open the manifest cache and update it if necessary.
my $dbh = updateManifestDB();
sub isValidPath {
@ -110,7 +97,10 @@ sub computeSmallestDownload {
else {
# Add patch edges.
my $patchList = $patches{$u};
my $patchList = $dbh->selectall_arrayref(
"select * from Patches where storePath = ?",
{ Slice => {} }, $u);
foreach my $patch (@{$patchList}) {
if (isValidPath($patch->{basePath})) {
# !!! this should be cached
@ -129,11 +119,15 @@ sub computeSmallestDownload {
}
# Add NAR file edges to the start node.
my $narFileList = $narFiles{$u};
my $narFileList = $dbh->selectall_arrayref(
"select * from NARs where storePath = ?",
{ Slice => {} }, $u);
foreach my $narFile (@{$narFileList}) {
# !!! how to handle files whose size is not known in advance?
# For now, assume some arbitrary size (1 MB).
addEdge \%graph, "start", $u, ($narFile->{size} || 1000000), "narfile", $narFile;
# For now, assume some arbitrary size (1 GB).
# This has the side-effect of preferring non-Hydra downloads.
addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile;
}
}
}
@ -160,7 +154,7 @@ sub computeSmallestDownload {
$v_->{d} = $u_->{d} + $edge->{weight};
# Store the edge; to edge->start is actually the
# predecessor.
$v_->{pred} = $edge;
$v_->{pred} = $edge;
}
}
}
@ -188,15 +182,21 @@ if ($ARGV[0] eq "--query") {
if ($cmd eq "have") {
my $storePath = <STDIN>; chomp $storePath;
print STDOUT (defined $narFiles{$storePath} ? "1\n" : "0\n");
print STDOUT (
scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0
? "1\n" : "0\n");
}
elsif ($cmd eq "info") {
my $storePath = <STDIN>; chomp $storePath;
my $infos = $dbh->selectall_arrayref(
"select * from NARs where storePath = ?",
{ Slice => {} }, $storePath);
my $info;
if (defined $narFiles{$storePath}) {
$info = @{$narFiles{$storePath}}[0];
if (scalar @{$infos} > 0) {
$info = @{$infos}[0];
}
else {
print "0\n";
@ -205,12 +205,12 @@ if ($ARGV[0] eq "--query") {
print "1\n";
print "$info->{deriver}\n";
my @references = split " ", $info->{references};
my @references = split " ", $info->{refs};
print scalar @references, "\n";
print "$_\n" foreach @references;
my @path = computeSmallestDownload $storePath;
my $downloadSize = 0;
while (scalar @path > 0) {
my $edge = pop @path;
@ -223,7 +223,7 @@ if ($ARGV[0] eq "--query") {
$downloadSize += $edge->{info}->{size} || 0;
}
}
print "$downloadSize\n";
my $narSize = $info->{narSize} || 0;