X-Git-Url: https://git.deb.at/?p=deb%2Fpackages.git;a=blobdiff_plain;f=bin%2Fparse-packages;h=6f89070915800afb508d1d56c70bab4e18f721f9;hp=b0883a279da39798715b9b9a0a67eb73527ecd45;hb=0f318fa9ad9d473b543a48f46a7714a11283d300;hpb=07fdff9c69f8bd3b4d357fd61042f588701dd1c6 diff --git a/bin/parse-packages b/bin/parse-packages index b0883a2..6f89070 100755 --- a/bin/parse-packages +++ b/bin/parse-packages @@ -2,9 +2,9 @@ # Convert Packages.gz files into Sleepycat db files for efficient usage of # data # -# $Id$ -# # Copyright (C) 2006 Jeroen van Wolffelaar +# Copyright (C) 2006-2007 Frank Lichtenheld +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or @@ -17,7 +17,7 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use strict; use warnings; @@ -87,9 +87,9 @@ for my $suite (@SUITES) { } # Skip double package next if exists($packages_all_db{"$data{'package'} $data{'architecture'} $data{'version'}"}); - # Skip arch:all for amd64 & gnuab, any non-redundancy is + # Skip arch:all for amd64 & debports, any non-redundancy is # usually a bug anyway - next if ($archive eq 'amd64' or $archive eq 'gnuab') + next if ($archive eq 'amd64' or $archive eq 'debports') and $data{architecture} eq 'all'; if ($data{'provides'}) { @@ -115,10 +115,12 @@ for my $suite (@SUITES) { foreach (@complete_tags) { my ($facet, $tag) = split( /::/, $_, 2); if ($tag =~ s/^\{(.+)\}$/$1/) { + warn "pkg=$data{package} tags=$data{tag}\n" unless $facet && $tag; foreach (split( /,/, $tag )) { push @tags, "${facet}::$_"; } } else { + warn "pkg=$data{package} tags=$data{tag}\n" unless $facet && $tag; push @tags, "${facet}::$tag"; } } @@ -149,8 +151,6 @@ for my $suite (@SUITES) { my $subsection = $data{section} || '-'; if ($data{section} && ($data{section} =~ m=/=o)) { ($section, $subsection) = split m=/=o, $data{section}, 2; - ($subsection, $section) = split m=/=o, $data{section}, 2 - if $section eq 'non-US'; } $data{'section'} = $section; $data{'subsection'} = $subsection; @@ -159,7 +159,7 @@ for my $suite (@SUITES) { $subsections{$suite}{$subsection}++; $priorities{$suite}{$data{priority}}++; my $pkgitem = "$archive $suite $data{'architecture'} ". - "$section $subsection $data{'priority'} $data{'version'} $sdescr\0"; + "$section $subsection $data{'priority'} $data{'version'} $data{'description-md5'} $sdescr\0"; my $previtem = ($packages_small{$data{'package'}}{$suite}{$data{'architecture'}} ||= $pkgitem); $packages_small{$data{'package'}}{$suite}{$data{'architecture'}} = $pkgitem @@ -269,6 +269,7 @@ print "Index $#descriptions descriptions\n"; for (my $i=1; $i<= $#descriptions; $i++) { # strip away additional data my ($only_desc, $pkg, $tags) = split /\000/o, $descriptions[$i], 3; + my $orig_desc = $only_desc; # WARNING: This needs to correspond with what happens in # Packages/Search.pm:do_xapian_search $only_desc =~ s#[^\w/+]+# #og; @@ -304,7 +305,7 @@ for (my $i=1; $i<= $#descriptions; $i++) { } } if ($tags) { - foreach my $t (split /, /, $tags) { + foreach my $t (split m/, /, $tags) { if ($doc->add_term($t)) { warn "can't add term $t: $!\n"; } @@ -316,7 +317,7 @@ for (my $i=1; $i<= $#descriptions; $i++) { }; die $@ if $@; - $descriptions_db{$i} = $only_desc; + $descriptions_db{$i} = $orig_desc; } untie %descriptions_db; $xapian_db->flush; @@ -337,11 +338,10 @@ tie %package_postfixes_db, "DB_File", "$DBDIR/package_postfixes.db.new", or die "Error creating DB: $!"; while (my ($k, $v) = each(%package_postfixes)) { $v =~ s/.$//s; - my $nr = $v; - $nr =~ s/[^\000]//g; - $nr = length($nr) + 1; # < number of hits + my $nr = ($v =~ tr/\000/\000/) + 1; if ($nr > $MAX_PACKAGE_POSTFIXES) { - $v = "\001" . $nr; + $v = ($v =~ /\^/) ? "^\001" . $nr + : "\001" . $nr; } $package_postfixes_db{$k} = $v; }