2 # Convert Contents.gz files into Sleepycat db files for efficient usage of
7 # Copyright (C) 2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 # Important, we want sorting and such to happen like in the C locale: binary,
28 # without any fancy collation. FIXME: is this actually adequate?
31 my $what = $ARGV[0] ? "head -10000|" : "";
33 # More RAM vs more disk I/O tradeoff parameters, does not change
34 # functionality. True will always use more RAM at the benefit of less
35 # temporary files, and is adviced when possible
36 my $SORT_REVERSE_CONCURRENTLY = 1;
41 use Packages::Config qw( $TOPDIR $DBDIR @ARCHIVES @SUITES @ARCHITECTURES );
42 &Packages::Config::init( './' );
44 my @archives = @ARCHIVES;
46 my @archs = @ARCHITECTURES;
48 $DBDIR .= "/contents";
49 -d $DBDIR || mkpath( $DBDIR );
51 for my $suite (@suites) {
52 for my $arch (@archs) {
54 my $filelist_db = "$DBDIR/filelists_${suite}_${arch}.db";
55 my $dbtime = (stat $filelist_db)[9];
56 my %packages_contents = ();
57 my %packages_contents_nr = ();
58 my %packages_contents_lastword = ();
61 $extra = "|sort" if $SORT_REVERSE_CONCURRENTLY;
63 open REVERSED, "$extra>$DBDIR/reverse.tmp"
64 or die "Failed to open output reverse file: $!";
67 for my $archive (@archives) {
69 my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz";
70 next unless -f $filename;
71 # Note: ctime, because mtime is set back via rsync
72 my $ftime = (stat $filename)[10];
73 next if defined $dbtime and $dbtime > $ftime;
74 print "$archive/$suite/$arch needs update\n";
78 for my $archive (@archives) {
80 my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz";
81 next unless -f $filename;
82 print "Reading $archive/$suite/$arch...\n";
84 open CONT, "zcat $filename|$what"
86 while (<CONT>) {last if /^FILE/mo;}
87 open CONT, "zcat $filename|$what" if eof(CONT);
92 print "Doing line ".($./1000)."k (out of approx 1.5M)\n" if $. % 250000 == 0;
94 my ($file, $value) = ($1, $2);
95 $value =~ s#[^,/]+/##og;
96 my @packages = split /,/, $value;
98 $packages_contents_nr{$_}++;
99 my $lw = $packages_contents_lastword{$_} || "\0";
101 while (substr($file,$i,1) eq substr($lw,$i++,1)) {}
103 $i = 255 if $i > 255;
104 $packages_contents{$_} .= pack "CC/a*", ($i, substr($file, $i));
105 $packages_contents_lastword{$_} = "$file\0";
107 # Searches are case-insensitive
108 $file =~ tr [A-Z] [a-z];
110 print REVERSED (reverse $file)."\0".(join ":$arch\0", @packages).":$arch\n";
117 print "Sorting reverse list if needed\n";
118 system("cd $DBDIR && sort reverse.tmp > reverse.sorted && mv reverse.{sorted,tmp}") == 0
119 or die "Failed to sort reverse"
120 unless $SORT_REVERSE_CONCURRENTLY;
122 print "Writing filelist db\n";
123 tie my %packages_contents_db, "DB_File", "$filelist_db.new",
124 O_RDWR|O_CREAT, 0666, $DB_BTREE
125 or die "Error creating DB: $!";
126 while (my ($k, $v) = each(%packages_contents)) {
127 $packages_contents_db{$k} = (pack "L", $packages_contents_nr{$k})
130 untie %packages_contents_db;
132 rename("$DBDIR/reverse.tmp", "$DBDIR/reverse_${suite}_${arch}.txt");
134 rename("$filelist_db.new", $filelist_db);
135 system("ln -sf $filelist_db $DBDIR/filelists_${suite}_all.db") == 0
141 my $suite_mtime = (stat "$DBDIR/reverse_$suite.db")[9];
142 for my $file (glob "$DBDIR/reverse_${suite}_*.txt") {
143 $go = 1 if not defined $suite_mtime
144 or $suite_mtime < (stat $file)[9];
148 print "Merging reverse path lists for ${suite}...\n";
150 open MERGED, "sort -m $DBDIR/reverse_${suite}_*.txt |"
151 or die "Failed to open merged list";
152 open FILENAMES, "> $DBDIR/filenames_$suite.txt.new"
153 or die "Failed to open filenames list";
154 tie my %reverse_path_db, "DB_File", "$DBDIR/reverse_${suite}.db.new",
155 O_RDWR|O_CREAT, 0666, $DB_BTREE
156 or die "Error creating DB: $!";
162 print "Doing line ".($./1000000)."M (out of approx. 16M)\n"
163 if $. % 1000000 == 0;
165 my @line = split /\0/o, $_;
166 my $revpath = shift @line;
167 if ($revpath ne $lastpath) {
168 # Wrap: Do useful stuff with this ($lastpath, @matches)
169 $reverse_path_db{$lastpath} = join "\0", @matches if $lastpath ne "";
170 $lastpath =~ s,/.*,,o;
171 if ($lastfile ne $lastpath) {
172 $lastfile = $lastpath;
173 print FILENAMES (reverse $lastfile)."\n";
176 $lastpath = $revpath;
182 # Note: do useful stuff here too, for out last entry. Maybe prevent this by
183 # adding a fake ultimate entry?
184 $reverse_path_db{$lastpath} = join "\0", @matches;
186 untie %reverse_path_db;
190 rename "$DBDIR/filenames_$suite.txt.new", "$DBDIR/filenames_$suite.txt";
191 rename "$DBDIR/reverse_$suite.db.new", "$DBDIR/reverse_$suite.db";