From 564f2f93ec21c9210415eb6cfb7eea589b05a5ae Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Fri, 24 Feb 2006 03:21:30 +0000 Subject: [PATCH] Support multiple archives --- bin/parse-contents | 205 ++++++++++++++++++++++++--------------------- 1 file changed, 110 insertions(+), 95 deletions(-) diff --git a/bin/parse-contents b/bin/parse-contents index 12b7ed8..767aad4 100755 --- a/bin/parse-contents +++ b/bin/parse-contents @@ -41,48 +41,60 @@ use File::Path; use Packages::Config qw( $TOPDIR $DBDIR @ARCHIVES @SUITES @ARCHITECTURES ); &Packages::Config::init( './' ); -my @archives =( 'us'); #@ARCHIVES # NOT-IMPLEMENTED-YET +my @archives = @ARCHIVES; my @suites = @SUITES; my @archs = @ARCHITECTURES; $DBDIR .= "/contents"; -d $DBDIR || mkpath( $DBDIR ); -for my $archive (@archives) { for my $suite (@suites) { +for my $suite (@suites) { + for my $arch (@archs) { - for my $arch (@archs) { - - my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz"; my $filelist_db = "$DBDIR/filelists_${suite}_${arch}.db"; - next unless -f $filename; - my $ftime = (stat $filename)[10]; # Note: ctime, because mtime is set back via rsync my $dbtime = (stat $filelist_db)[9]; - next if defined $dbtime and $dbtime > $ftime; - print "Reading $archive/$suite/$arch...\n"; - my %packages_contents = (); my %packages_contents_nr = (); my %packages_contents_lastword = (); - + my $extra = ""; $extra = "|sort" if $SORT_REVERSE_CONCURRENTLY; - + open REVERSED, "$extra>$DBDIR/reverse.tmp" - or die "Failed to open output reverse file: $!"; - - open CONT, "zcat $filename|$what" - or die $!; - while () {last if /^FILE/mo;} - while () { - my $data = ""; - my %data = (); - chomp; - print "Doing line ".($./1000)."k (out of approx 1.5M)\n" if $. % 250000 == 0; - /^(.+?)\s+(\S+)$/o; - my ($file, $value) = ($1, $2); - $value =~ s#[^,/]+/##og; - my @packages = split /,/, $value; - for (@packages) { + or die "Failed to open output reverse file: $!"; + + my $changed = 0; + for my $archive (@archives) { + + my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz"; + next unless -f $filename; + # Note: ctime, because mtime is set back via rsync + my $ftime = (stat $filename)[10]; + next if defined $dbtime and $dbtime > $ftime; + print "$archive/$suite/$arch needs update\n"; + $changed++; + } + if ($changed) { + for my $archive (@archives) { + + my $filename = "$TOPDIR/archive/$archive/$suite/Contents-$arch.gz"; + next unless -f $filename; + print "Reading $archive/$suite/$arch...\n"; + + open CONT, "zcat $filename|$what" + or die $!; + while () {last if /^FILE/mo;} + open CONT, "zcat $filename|$what" if eof(CONT); + while () { + my $data = ""; + my %data = (); + chomp; + print "Doing line ".($./1000)."k (out of approx 1.5M)\n" if $. % 250000 == 0; + /^(.+?)\s+(\S+)$/o; + my ($file, $value) = ($1, $2); + $value =~ s#[^,/]+/##og; + my @packages = split /,/, $value; + for (@packages) { $packages_contents_nr{$_}++; my $lw = $packages_contents_lastword{$_} || "\0"; my $i=0; @@ -91,89 +103,92 @@ for my $archive (@archives) { for my $suite (@suites) { $i = 255 if $i > 255; $packages_contents{$_} .= pack "CC/a*", ($i, substr($file, $i)); $packages_contents_lastword{$_} = "$file\0"; + } + # Searches are case-insensitive + $file =~ tr [A-Z] [a-z]; + + print REVERSED (reverse $file)."\0".(join ":$arch\0", @packages).":$arch\n"; } - # Searches are case-insensitive - $file =~ tr [A-Z] [a-z]; - - print REVERSED (reverse $file)."\0".(join ":$arch\0", @packages).":$arch\n"; - } - close CONT; - close REVERSED; - - print "Sorting reverse list if needed\n"; - system("cd $DBDIR && sort reverse.tmp > reverse.sorted && mv reverse.{sorted,tmp}") == 0 + close CONT; + + } + close REVERSED; + + print "Sorting reverse list if needed\n"; + system("cd $DBDIR && sort reverse.tmp > reverse.sorted && mv reverse.{sorted,tmp}") == 0 or die "Failed to sort reverse" unless $SORT_REVERSE_CONCURRENTLY; - - print "Writing filelist db\n"; - tie my %packages_contents_db, "DB_File", "$filelist_db.new", - O_RDWR|O_CREAT, 0666, $DB_BTREE + + print "Writing filelist db\n"; + tie my %packages_contents_db, "DB_File", "$filelist_db.new", + O_RDWR|O_CREAT, 0666, $DB_BTREE or die "Error creating DB: $!"; - while (my ($k, $v) = each(%packages_contents)) { + while (my ($k, $v) = each(%packages_contents)) { $packages_contents_db{$k} = (pack "L", $packages_contents_nr{$k}) - . $v; - } - untie %packages_contents_db; - - rename("$DBDIR/reverse.tmp", "$DBDIR/reverse_${suite}_${arch}.txt"); - - rename("$filelist_db.new", $filelist_db); - system("ln -sf $filelist_db $DBDIR/filelists_${suite}_all.db") == 0 + . $v; + } + untie %packages_contents_db; + + rename("$DBDIR/reverse.tmp", "$DBDIR/reverse_${suite}_${arch}.txt"); + + rename("$filelist_db.new", $filelist_db); + system("ln -sf $filelist_db $DBDIR/filelists_${suite}_all.db") == 0 or die "Oops"; - } - - - my $go = 0; - my $suite_mtime = (stat "$DBDIR/reverse_$suite.db")[9]; - for my $file (glob "$DBDIR/reverse_${suite}_*.txt") { - $go = 1 if not defined $suite_mtime - or $suite_mtime < (stat $file)[9]; - } - next unless $go; - - print "Merging reverse path lists for ${suite}...\n"; - - open MERGED, "sort -m $DBDIR/reverse_${suite}_*.txt |" + } + } + + my $go = 0; + my $suite_mtime = (stat "$DBDIR/reverse_$suite.db")[9]; + for my $file (glob "$DBDIR/reverse_${suite}_*.txt") { + $go = 1 if not defined $suite_mtime + or $suite_mtime < (stat $file)[9]; + } + next unless $go; + + print "Merging reverse path lists for ${suite}...\n"; + + open MERGED, "sort -m $DBDIR/reverse_${suite}_*.txt |" or die "Failed to open merged list"; - open FILENAMES, "> $DBDIR/filenames_$suite.txt.new" + open FILENAMES, "> $DBDIR/filenames_$suite.txt.new" or die "Failed to open filenames list"; - tie my %reverse_path_db, "DB_File", "$DBDIR/reverse_${suite}.db.new", - O_RDWR|O_CREAT, 0666, $DB_BTREE - or die "Error creating DB: $!"; - - my $lastpath = ""; - my $lastfile = ""; - my @matches = (); - while () { - print "Doing line ".($./1000000)."M (out of approx. 16M)\n" if $. % 1000000 == 0; + tie my %reverse_path_db, "DB_File", "$DBDIR/reverse_${suite}.db.new", + O_RDWR|O_CREAT, 0666, $DB_BTREE + or die "Error creating DB: $!"; + + my $lastpath = ""; + my $lastfile = ""; + my @matches = (); + while () { + print "Doing line ".($./1000000)."M (out of approx. 16M)\n" + if $. % 1000000 == 0; chomp; my @line = split /\0/o, $_; my $revpath = shift @line; if ($revpath ne $lastpath) { - # Wrap: Do useful stuff with this ($lastpath, @matches) - $reverse_path_db{$lastpath} = join "\0", @matches if $lastpath ne ""; - $lastpath =~ s,/.*,,o; - if ($lastfile ne $lastpath) { + # Wrap: Do useful stuff with this ($lastpath, @matches) + $reverse_path_db{$lastpath} = join "\0", @matches if $lastpath ne ""; + $lastpath =~ s,/.*,,o; + if ($lastfile ne $lastpath) { $lastfile = $lastpath; print FILENAMES (reverse $lastfile)."\n"; - } - # - $lastpath = $revpath; - @matches = @line; - next; + } + # + $lastpath = $revpath; + @matches = @line; + next; } push @matches, @line - } - # Note: do useful stuff here too, for out last entry. Maybe prevent this by - # adding a fake ultimate entry? - $reverse_path_db{$lastpath} = join "\0", @matches; - - untie %reverse_path_db; - close FILENAMES; - close MERGED; - - rename "$DBDIR/filenames_$suite.txt.new", "$DBDIR/filenames_$suite.txt"; - rename "$DBDIR/reverse_$suite.db.new", "$DBDIR/reverse_$suite.db"; -}} + } + # Note: do useful stuff here too, for out last entry. Maybe prevent this by + # adding a fake ultimate entry? + $reverse_path_db{$lastpath} = join "\0", @matches; + + untie %reverse_path_db; + close FILENAMES; + close MERGED; + + rename "$DBDIR/filenames_$suite.txt.new", "$DBDIR/filenames_$suite.txt"; + rename "$DBDIR/reverse_$suite.db.new", "$DBDIR/reverse_$suite.db"; +} # vim: set ts=4 -- 2.39.2