* `show-duplication.pl', a small utility that shows the amount of

package duplication present in (e.g.) a profile.  It shows the
  number of instances of each package in a closure, along with the
  size in bytes of each instance as well as the "waste" (the
  difference between the sum of the sizes of all instances and the
  average size).

  $ ./show-duplication.pl /nix/var/nix/profiles/default
  gcc 11
    3.3.6 19293318
    3.4.4 21425257
    ...
    average 14942970, waste 149429707
  coreutils 6
  ...
  average package duplication 1.87628865979381, total size 3486330471, total waste 1335324237, 38.3017114443825% wasted

  This utility is useful for measuring the cost in terms of disk space
  of the Nix approach.
This commit is contained in:
Eelco Dolstra 2006-09-19 13:53:35 +00:00
parent e0afaf1857
commit 9488ae7357
2 changed files with 73 additions and 1 deletions

View file

@ -7,7 +7,6 @@ use strict;
select $ofh; select $ofh;
} }
#my @paths = ("/nix/store/caef3a49150506d233f474322a824e50-glibc-2.3.3", "/nix/store/a8a9d585d1ad4b1bc911be7743b3b996-glibc-2.3.3");
my @paths = ("/nix/store"); my @paths = ("/nix/store");
my $tmpfile = "/tmp/nix-optimise-hash-list"; my $tmpfile = "/tmp/nix-optimise-hash-list";

73
scripts/show-duplication.pl Executable file
View file

@ -0,0 +1,73 @@
#! /usr/bin/perl -w
if (scalar @ARGV != 1) {
print "syntax: show-duplication.pl PATH\n";
exit 1;
}
my $root = $ARGV[0];
my $nameRE = "(?:(?:[A-Za-z0-9\+\_]|(?:-[^0-9]))+)";
my $versionRE = "(?:[A-Za-z0-9\.\-]+)";
my %pkgInstances;
my $pid = open(PATHS, "-|") || exec "nix-store", "-qR", $root;
while (<PATHS>) {
chomp;
/^.*\/[0-9a-z]*-(.*)$/;
my $nameVersion = $1;
$nameVersion =~ /^($nameRE)(-($versionRE))?$/;
$name = $1;
$version = $3;
$version = "(unnumbered)" unless defined $version;
# print "$nameVersion $name $version\n";
push @{$pkgInstances{$name}}, {version => $version, path => $_};
}
close PATHS or exit 1;
sub pathSize {
my $path = shift;
my @st = lstat $path or die;
my $size = $st[7];
if (-d $path) {
opendir DIR, $path or die;
foreach my $name (readdir DIR) {
next if $name eq "." || $name eq "..";
$size += pathSize("$path/$name");
}
}
return $size;
}
my $totalPaths = 0;
my $totalSize = 0, $totalWaste = 0;
foreach my $name (sort {scalar @{$pkgInstances{$b}} <=> scalar @{$pkgInstances{$a}}} (keys %pkgInstances)) {
print "$name ", scalar @{$pkgInstances{$name}}, "\n";
my $allSize = 0;
foreach my $x (sort {$a->{version} cmp $b->{version}} @{$pkgInstances{$name}}) {
$totalPaths++;
my $size = pathSize $x->{path};
$allSize += $size;
print " $x->{version} $size\n";
}
my $avgSize = int($allSize / scalar @{$pkgInstances{$name}});
my $waste = $allSize - $avgSize;
$totalSize += $allSize;
$totalWaste += $waste;
print " average $avgSize, waste $waste\n";
}
my $avgDupl = $totalPaths / scalar (keys %pkgInstances);
my $wasteFactor = ($totalWaste / $totalSize) * 100;
print "average package duplication $avgDupl, total size $totalSize, total waste $totalWaste, $wasteFactor% wasted\n";