2 # Convert Contents.gz files into Sleepycat db files for efficient usage of
5 # Copyright (C) 2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 # Important, we want sorting and such to happen like in the C locale: binary,
27 # without any fancy collation. FIXME: is this actually adequate?
30 my $what = $ARGV[0] ? "head -10000|" : "";
32 # More RAM vs more disk I/O tradeoff parameters, does not change
33 # functionality. True will always use more RAM at the benefit of less
34 # temporary files, and is adviced when possible
35 my $SORT_REVERSE_CONCURRENTLY = 1;
42 use Packages::CommonCode qw(:all);
43 use Packages::Config qw( $TOPDIR $DBDIR @ARCHIVES @SUITES @ARCHITECTURES );
44 &Packages::Config::init( './' );
46 my @archives = @ARCHIVES;
48 my @archs = @ARCHITECTURES;
50 $DBDIR .= "/contents";
53 for my $suite (@suites) {
54 for my $arch (@archs) {
56 my $filelist_db = "$DBDIR/filelists_${suite}_${arch}.db";
57 my $dbtime = (stat $filelist_db)[9];
58 my %packages_contents = ();
59 my %packages_contents_nr = ();
60 my %packages_contents_lastword = ();
63 $extra = "|sort" if $SORT_REVERSE_CONCURRENTLY;
65 open REVERSED, "$extra>$DBDIR/reverse.tmp"
66 or die "Failed to open output reverse file: $!";
69 for my $archive (@archives) {
71 my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz";
72 next unless -f $filename;
73 # Note: ctime, because mtime is set back via rsync
74 my $ftime = (stat $filename)[10];
75 next if defined $dbtime and $dbtime > $ftime;
76 print "$archive/$suite/$arch needs update\n";
80 for my $archive (@archives) {
82 my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz";
83 next unless -f $filename;
84 print "Reading $archive/$suite/$arch...\n";
86 open CONT, "zcat $filename|$what"
89 while (<CONT>) { /^FILE/mo && do { $header_found = 1; last };}
90 if (eof(CONT)) { # no header found or only header found
91 close CONT; # explicit close to reset $.
92 next if $header_found;
93 open CONT, "zcat $filename|$what";
99 print "Doing line ".($NR/1000)."k (out of approx 2.0M)\n"
100 if $NR % 250000 == 0;
102 my ($file, $value) = ($1, $2);
103 $value =~ s#[^,/]+/##og;
104 my @packages = split m/,/, $value;
106 $packages_contents_nr{$_}++;
107 my $lw = $packages_contents_lastword{$_} || "\0";
109 while (substr($file,$i,1) eq substr($lw,$i++,1)) {}
111 $i = 255 if $i > 255;
112 $packages_contents{$_} .= pack "CC/a*", ($i, substr($file, $i));
113 $packages_contents_lastword{$_} = "$file\0";
115 # Searches are case-insensitive
116 (my $nocase = $file) =~ tr [A-Z] [a-z];
117 my $case = ($nocase eq $file) ? '-' : $file;
119 print REVERSED (reverse $nocase)."\0".$case."\0".
120 (join ":$arch\0", @packages).":$arch\n";
127 print "Sorting reverse list if needed\n";
128 system("cd $DBDIR && sort reverse.tmp > reverse.sorted &&".
129 " mv reverse.{sorted,tmp}") == 0
130 or die "Failed to sort reverse"
131 unless $SORT_REVERSE_CONCURRENTLY;
133 print "Writing filelist db\n";
134 tie my %packages_contents_db, "DB_File", "$filelist_db.new",
135 O_RDWR|O_CREAT, 0666, $DB_BTREE
136 or die "Error creating DB: $!";
137 while (my ($k, $v) = each(%packages_contents)) {
138 $packages_contents_db{$k} = (pack "L", $packages_contents_nr{$k})
141 untie %packages_contents_db;
143 rename("$DBDIR/reverse.tmp", "$DBDIR/reverse_${suite}_${arch}.txt");
145 activate($filelist_db);
146 #FIXME: hardcoded archs. (debports has no contrib/non-free)
147 if ($arch !~ m/^kfreebsd-.*$/) {
148 system("ln", "-sf", basename($filelist_db),
149 "$DBDIR/filelists_${suite}_all.db") == 0
156 my $suite_mtime = (stat "$DBDIR/reverse_$suite.db")[9];
157 for my $file (glob "$DBDIR/reverse_${suite}_*.txt") {
158 $go = 1 if not defined $suite_mtime
159 or $suite_mtime < (stat $file)[9];
163 print "Merging reverse path lists for ${suite}...\n";
165 open MERGED, "-|", "sort -m $DBDIR/reverse_${suite}_*.txt"
166 or die "Failed to open merged list";
167 open FILENAMES, ">", "$DBDIR/filenames_$suite.txt.new"
168 or die "Failed to open filenames list";
169 tie my %reverse_path_db, "DB_File", "$DBDIR/reverse_${suite}.db.new",
170 O_RDWR|O_CREAT, 0666, $DB_BTREE
171 or die "Error creating DB: $!";
173 my $lastpath = my $lastcasepath = my $lastfile = "";
176 print "Doing line ".($NR/1000000)."M (out of approx. 20M)\n"
177 if $NR % 1000000 == 0;
179 my @line = split m/\0/o, $_;
180 my $revpath = shift @line;
181 my $casepath = shift @line;
182 if ($revpath ne $lastpath) {
183 # Wrap: Do useful stuff with this ($lastpath, @matches)
184 if ($lastpath ne "") {
186 while (my ($k, $v) = each %matches) {
187 push @matches, join("\0", $k, @$v);
189 $reverse_path_db{$lastpath} = join "\1", @matches;
192 $lastpath =~ s,/.*,,o;
193 if ($lastfile ne $lastpath) {
194 $lastfile = $lastpath;
195 print FILENAMES (reverse $lastfile)."\n";
198 $lastpath = $revpath;
199 $lastcasepath = $casepath;
200 $matches{$casepath} = \@line;
202 # } elsif ($lastcasepath ne "" and $casepath ne $lastcasepath) {
203 # warn reverse($revpath)." has more than one casepath: $casepath $lastcasepath\n";
205 push @{$matches{$casepath}}, @line;
207 # Note: do useful stuff here too, for out last entry. Maybe prevent this by
208 # adding a fake ultimate entry?
211 while (my ($k, $v) = each %matches) {
212 push @matches, join("\0", $k, @$v);
214 $reverse_path_db{$lastpath} = join "\1", @matches;
217 untie %reverse_path_db;
221 activate("$DBDIR/filenames_$suite.txt");
222 activate("$DBDIR/reverse_$suite.db");