#!/usr/bin/perl -w # Convert Contents.gz files into Sleepycat db files for efficient usage of # data # # $Id$ # # Copyright (C) 2006 Jeroen van Wolffelaar # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; use lib './lib'; my $what = $ARGV[0] ? "head -10000|" : ""; use DB_File; use Storable; use Packages::Config qw( $TOPDIR $DBDIR @ARCHIVES @SUITES @ARCHITECTURES ); &Packages::Config::init( './' ); my %filenames = (); my @archives =( 'us'); #@ARCHIVES # NOT-IMPLEMENTED-YET my @suites = @SUITES; my @archs = @ARCHITECTURES; for my $archive (@archives) { for my $suite (@suites) { for my $arch (@archs) { my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz"; my $db = "$DBDIR/packages_contents_${suite}_${arch}.db"; next unless -f $filename; my $ftime = (stat $filename)[9]; my $dbtime = (stat $db)[9]; next unless $ftime > $dbtime; print "Reading $archive/$suite/$arch...\n"; my %packages_contents = (); my %packages_contents_nr = (); my %packages_contents_lastword = (); my %contents_packages_reverse = (); open CONT, "zcat $filename|$what"; while () {last if /^FILE/mo;} while () { my $data = ""; my %data = (); chomp; print "Doing line $.\n" if $. % 10000 == 0; /^(.+?)\s+(\S+)$/o; my ($file, $value) = ($1, $2); $value =~ s#[^,/]+/##og; my @packages = split /,/, $value; for (@packages) { $packages_contents_nr{$_}++; my $lw = $packages_contents_lastword{$_} || "\0"; my $i=0; while (substr($file,$i,1) eq substr($lw,$i++,1)) {} $i--; $i = 255 if $i > 255; $packages_contents{$_} .= pack "CC/a*", ($i, substr($file, $i)); $packages_contents_lastword{$_} = "$file\0"; } # Searches are case-insensitive $file =~ tr [A-Z] [a-z]; my $filename = $file; $filename =~ s,.*/,,; $filenames{$filename} = 1; $contents_packages_reverse{reverse $file} = join "\0", @packages; } my %contents_packages_reverse_db; tie %contents_packages_reverse_db, "DB_File", "$DBDIR/contents_packages_reverse_${suite}_${arch}.db.new", O_RDWR|O_CREAT, 0666, $DB_BTREE or die "Error creating DB: $!"; while (my ($x, $y) = each(%contents_packages_reverse)) { $contents_packages_reverse_db{$x} = $y; } untie %contents_packages_reverse_db; my %packages_contents_db; tie %packages_contents_db, "DB_File", "$DBDIR/packages_contents_${suite}_${arch}.db.new", O_RDWR|O_CREAT, 0666, $DB_BTREE or die "Error creating DB: $!"; while (my ($k, $v) = each(%packages_contents)) { $packages_contents_db{$k} = (pack "L", $packages_contents_nr{$k}) . $v; } untie %packages_contents_db; }}} print "Writing databases...\n"; # FIXME: missing filenames due to optimising above. Need to store filenames # per-suite/arch, but merge them in the end for better cached searching open FILENAMES, "> $DBDIR/filenames.txt.new"; for (keys %filenames) { print FILENAMES "$_\n"; } close FILENAMES; rename("$DBDIR/filenames.txt.new", "$DBDIR/filenames.txt"); for my $archive (@archives) { for my $suite (@suites) { for my $arch (@archs) { rename("$DBDIR/packages_contents_${suite}_${arch}.db.new", "$DBDIR/packages_contents_${suite}_${arch}.db"); rename("$DBDIR/contents_packages_reverse_${suite}_${arch}.db.new", "$DBDIR/contents_packages_reverse_${suite}_${arch}.db"); }}}