X-Git-Url: https://git.deb.at/?a=blobdiff_plain;ds=sidebyside;f=cgi-bin%2Fsearch_packages.pl;h=f6df10c95635a882bc87a825eaf698266ae0777c;hb=7837241d743061df9f58680a9b786e48d3365a48;hp=bc51c30f66c3ce0fbc0e479dca8ea1f5f28cbba7;hpb=e41d34b008ea4b4ad09b5eea457d7523c7ef2fa3;p=deb%2Fpackages.git diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl index bc51c30..f6df10c 100755 --- a/cgi-bin/search_packages.pl +++ b/cgi-bin/search_packages.pl @@ -6,15 +6,14 @@ # Copyright (C) 2000, 2001 Josip Rodin # Copyright (C) 2001 Adam Heath # Copyright (C) 2004 Martin Schulze -# Copyright (C) 2004 Frank Lichtenheld +# Copyright (C) 2004-2006 Frank Lichtenheld # # use is allowed under the terms of the GNU Public License (GPL) # see http://www.fsf.org/copyleft/gpl.html for a copy of the license -require 5.001; use strict; use CGI qw( -oldstyle_urls ); -#use CGI::Carp qw( fatalsToBrowser ); +use CGI::Carp qw( fatalsToBrowser ); use POSIX; use URI::Escape; use HTML::Entities; @@ -28,7 +27,6 @@ use Packages::Search qw( :all ); use Packages::HTML (); my $thisscript = "search_packages.pl"; -my $use_grep = 1; my $HOME = "http://www.debian.org"; my $ROOT = ""; my $SEARCHPAGE = "http://packages.debian.org/"; @@ -38,6 +36,10 @@ my @SECTIONS = qw( main contrib non-free ); my @ARCHIVES = qw( us security installer ); my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64 kfreebsd-i386 mips mipsel powerpc s390 sparc ); +my %SUITES = map { $_ => 1 } @SUITES; +my %SECTIONS = map { $_ => 1 } @SECTIONS; +my %ARCHIVES = map { $_ => 1 } @ARCHIVES; +my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES; $ENV{PATH} = "/bin:/usr/bin"; @@ -46,7 +48,7 @@ my $input = new CGI; my $pet0 = new Benchmark; # use this to disable debugging in production mode completly -my $debug_allowed = 0; +my $debug_allowed = 1; my $debug = $debug_allowed && $input->param("debug"); $Search::Param::debug = 1 if $debug > 1; @@ -55,6 +57,22 @@ print $input->header if $debug; # print $input->dump; # exit; +if (my $path = $input->param('path')) { + my @components = map { lc $_ } split /\//, $path; + + foreach (@components) { + if ($SUITES{$_}) { + $input->param('suite', $_); + } elsif ($SECTIONS{$_}) { + $input->param('section', $_); + } elsif ($ARCHIVES{$_}) { + $input->param('archive', $_); + }elsif ($ARCHITECTURES{$_}) { + $input->param('arch', $_); + } + } +} + my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' }, suite => { default => 'stable', match => '^(\w+)$', alias => 'version', array => ',', @@ -145,7 +163,8 @@ print Packages::HTML::header( title => 'Package Search Results' , # read the configuration my $topdir; if (!open (C, "../config.sh")) { - print "\nInternal Error: Cannot open configuration file.\n\n" if $format eq 'html'; + print "\nInternal Error: Cannot open configuration file.\n\n" +if $format eq 'html'; exit 0; } while () { @@ -153,172 +172,157 @@ while () { } close (C); -my $FLATDIR = $topdir . "/files/flat"; +my $DBDIR = $topdir . "/files/db"; my $search_on_sources = 0; -my %descr; -my %sections; - -sub find_desc -{ - my $pkg = shift; - my $suite = shift; - my $part = shift; - my $descr = ''; +my $st0 = new Benchmark; +my @results; +my $too_much_hits; +if ($searchon eq 'sourcenames') { + $search_on_sources = 1; +} - unless (exists $descr{$suite}{$part}) { - $descr{$suite}{$part} = {}; - tie %{$descr{$suite}{$part}}, 'DB_File', "$FLATDIR/$suite/$part/Description", O_RDONLY - or return "Error while loading descriptions database: $!"; +my %suites = map { $_ => 1 } @suites; +my %sections = map { $_ => 1 } @sections; +my %archs = map { $_ => 1 } @archs; + +print "DEBUG: suites=@suites, sections=@sections, archs=@archs
" if $debug > 2; + +sub read_entry { + my ($hash, $key, $results) = @_; + my $result = $hash->{$key}; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 7 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') + && $sections{$data[2]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } } - - return $descr{$suite}{$part}{$pkg}; } - -sub find_section -{ - my $pkg = shift; - my $suite = shift; - my $part = shift; - my $section = ''; - - unless (exists $sections{$suite}{$part}) { - $sections{$suite}{$part} = {}; - tie %{$sections{$suite}{$part}}, 'DB_File', "$FLATDIR/$suite/$part/Section", O_RDONLY - or return undef; +sub read_src_entry { + my ($hash, $key, $results) = @_; + my $result = $hash->{$key}; + + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 5 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($suites{$data[0]} && $sections{$data[1]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } } - - return $sections{$suite}{$part}{$pkg}; } -my $st0 = new Benchmark; -tie my %cache, 'DB_File', "$topdir/files/search.cache/search.cache", O_RDWR|O_CREAT or $use_cache = 0; -my $cached; -my @results; -my $cache_key = $keyword.$exact.$subword.$searchon.$suites_param.$sections_param.$archs_param; -if ($searchon eq 'sourcenames') { - $search_on_sources = 1; -} -if ($use_cache && ($cached = $cache{$cache_key})) { - @results = split /\n/, $cached; - print "DEBUG: Used cached results
$cached
" if $debug; -} else { - my $searchkeyword = $keyword; - my $grep_searchkeyword = $keyword; - $searchkeyword =~ s/[.]/\\./; - if (($searchon eq 'names') || ($searchon eq 'sourcenames')) { - # asserting that all package names are lower case - $searchkeyword = lc($searchkeyword) unless $case_bool; - $case_bool = 1; - $grep_searchkeyword = "^[^ ]*$searchkeyword" unless $exact; - $searchkeyword = "^\\S*$searchkeyword" unless $exact; + +if ($searchon eq 'names') { + + $keyword = lc $keyword unless $case_bool; + + my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + + if ($exact) { + read_entry( \%packages, $keyword, \@results ); } else { - $grep_searchkeyword = "\\(^$searchkeyword\\b\\|\\b$searchkeyword\\b\\)" - if $subword != 1; - $searchkeyword = "\\b$searchkeyword\\b" - if $subword != 1; + my ($key, $prefixes) = ($keyword, ''); + my %pkgs; + my $p_obj = tie my %pref, 'DB_File', "$DBDIR/package_postfixes.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie postfix db $DBDIR/package_postfixes.db: $!"; + $p_obj->seq( $key, $prefixes, R_CURSOR ); + do { + if ($prefixes =~ /^\001(\d+)/o) { + $too_much_hits += $1; + } else { + print "DEBUG: add word $key
" if $debug > 2; + $pkgs{$key}++; + foreach (split /\000/o, $prefixes) { + print "DEBUG: add word $_$key
" if $debug > 2; + $pkgs{$_.$key}++; + } + } + } while (($p_obj->seq( $key, $prefixes, R_NEXT ) == 0) + && (index($key, $keyword) >= 0) + && !$too_much_hits + && (keys %pkgs < 100)); + + my $no_results = keys %pkgs; + if ($too_much_hits || ($no_results >= 100)) { + $too_much_hits += $no_results; + %pkgs = ( $keyword => 1 ); + } + foreach my $pkg (sort keys %pkgs) { + read_entry( \%packages, $pkg, \@results ); + } } +} elsif ($searchon eq 'sourcenames') { + + $keyword = lc $keyword unless $case_bool; -# FIXME -# check if the Packages files are there -#my @files = glob ("$fdir/$file"); -#if ($#files == -1) { -# XXX has to be updated for new architectures -# if ($format eq 'html') { -# if (($version eq "stable" and $arch =~ /^(hurd|sh)$/) -# || ($version eq "oldstable" and $arch =~ /^amd64$/)) { -# print "Error: the $arch architecture didn't exist in $version.
\n" -# ."Please go back and choose a different distribution.\n"; -# } else { -# print "Error: Packages/Sources file not found.
\n" -# ."If the problem persists, please inform $ENV{SERVER_ADMIN}.\n"; -# printf "

$file

"; -# } -# &printfooter; -# } -# exit; -#} - - my @files; - foreach my $s (@suites) { - foreach my $sec (@sections) { - foreach my $a (@archs) { - foreach my $archive (@ARCHIVES) { - if (($searchon eq 'names' or $searchon eq 'sourcenames') - and $exact) { - my ( %packages, $file ); - if ($search_on_sources) { - $file = "$FLATDIR/$s/$sec/Sources.$archive.db"; - } else { - $file = "$FLATDIR/$s/$sec/Packages-$a.$archive.db"; - } - if (-f $file) { - print "DEBUG: Use file $file
" - if $debug > 1; - - tie %packages, 'DB_File', $file, O_RDONLY - or die "Couldn't open packages file $file: $!"; - - if (my $data = $packages{$searchkeyword}) { - print "DEBUG: Found result $data
" - if $debug > 1; - push @results, "$file:$data"; - } - } - } else { - my $file; - if ($search_on_sources) { - $file = "$FLATDIR/$s/$sec/Sources.$archive"; - } else { - $file = "$FLATDIR/$s/$sec/Packages-$a.$archive"; - } - if (-f $file) { - print "DEBUG: Use file $file
" - if $debug > 1; - - # use_grep is currently way faster, though - # I can't pinpoint exactly why, yet - # most probably the perl regexes are - # slow compared to the simpler grep - # regexes - unless ($use_grep) { - open my $pkg_fh, '<', $file - or die "Couldn't open packages file $file: $!"; - - foreach (<$pkg_fh>) { - if (/$searchkeyword/o) { - print "DEBUG: Found result $_
" - if $debug > 1; - - push @results, "$file:$_"; - } - } - } else { - push @files, $file; - } - } - } + my $obj = tie my %packages, 'DB_File', "$DBDIR/sources_small.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/sources_small.db: $!"; + + if ($exact) { + read_src_entry( \%packages, $keyword, \@results ); + } else { + while (my ($pkg, $result) = each %packages) { + #what's faster? I can't really see a difference + (index($pkg, $keyword) >= 0) or next; + #$pkg =~ /\Q$keyword\E/ or next; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 5 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($suites{$data[0]} && $sections{$data[1]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @results, [ $pkg , @data ]; } } } } +} else { - if ($use_grep) { - if (@files) { - my @grep = ( 'grep', '-H' ); - push @grep, '-i' unless $case_bool; - push @grep, $grep_searchkeyword; - push @grep, @files; - - print "DEBUG: starting grep command '". - substr("@grep",0,100)."[...]'
" if $debug; - open my $grep_out, '-|', @grep or - die "grep failed: $!"; - @results = <$grep_out>; + my @lines; + my $regex; + if ($case_bool) { + if ($exact) { + $regex = qr/\b\Q$keyword\E\b/o; + } else { + $regex = qr/\Q$keyword\E/o; + } + } else { + if ($exact) { + $regex = qr/\b\Q$keyword\E\b/io; + } else { + $regex = qr/\Q$keyword\E/io; } } - - $cache{$cache_key} = join "", @results; + + open DESC, '<', "$DBDIR/descriptions.txt" or die "couldn't open $DBDIR/descriptions.txt: $!"; + while () { + $_ =~ $regex or next; + print "DEBUG: Matched line $.
" if $debug > 2; + push @lines, $.; + } + close DESC; + + my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + my $obj = tie my %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!"; + + my %tmp_results; + foreach my $l (@lines) { + my $result = $did2pkg{$l}; + foreach (split /\000/o, $result) { + my @data = split /\s/, $_, 3; + next unless $archs{$data[2]}; + $tmp_results{$data[0]}++; + } + } + foreach my $pkg (keys %tmp_results) { + read_entry( \%packages, $pkg, \@results ); + } } my $st1 = new Benchmark; @@ -342,6 +346,10 @@ if ($format eq 'html') { } } +if ($too_much_hits) { +print "

Your search was too wide so we will only display exact matches. At least $too_much_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.

"; +} + if (!@results) { if ($format eq 'html') { my $keyword_esc = uri_escape( $keyword ); @@ -383,35 +391,31 @@ if (!@results) { } my (%pkgs, %sect, %part, %desc, %binaries); -my (@colon, $package, $pkg_t, $section, $ver, $arch, $foo, $binaries); unless ($search_on_sources) { - foreach my $line (@results) { - @colon = split (/:/, $line); - ($pkg_t, $section, $ver, $arch, $foo) = split (/ /, $#colon >1 ? $colon[1].":".$colon[2]:$colon[1], 5); - $section =~ s,^(non-free|contrib)/,,; - $section =~ s,^non-US.*$,non-US,,; - my ($dist,$part,undef) = $colon[0] =~ m,.*/([^/]+)/([^/]+)/Packages-([^\.]+)\.,; #$1=stable, $2=main, $3=alpha - - ($package) = $pkg_t =~ m/^(.+)/; # untaint - $pkgs{$package}{$dist}{$ver}{$arch} = 1; - $sect{$package}{$dist}{$ver} = $section; - $part{$package}{$dist}{$ver} = $part unless $part eq 'main'; - - $desc{$package}{$dist}{$ver} = find_desc ($package, $dist, $part) if (! exists $desc{$package}{$dist}{$ver}); + foreach (@results) { + my ($pkg_t, $suite, $arch, $section, $subsection, + $priority, $version, $desc) = @$_; + + my ($package) = $pkg_t =~ m/^(.+)/; # untaint + $pkgs{$package}{$suite}{$version}{$arch} = 1; + $sect{$package}{$suite}{$version} = $subsection; + $part{$package}{$suite}{$version} = $section unless $section eq 'main'; + + $desc{$package}{$suite}{$version} = $desc; } if ($format eq 'html') { my ($start, $end) = multipageheader( scalar keys %pkgs ); my $count = 0; - + foreach my $pkg (sort keys %pkgs) { $count++; next if $count < $start or $count > $end; printf "

Package %s

\n", $pkg; print "
    \n"; - foreach $ver (@SUITES) { + foreach my $ver (@SUITES) { if (exists $pkgs{$pkg}{$ver}) { my @versions = version_sort keys %{$pkgs{$pkg}{$ver}}; my $part_str = ""; @@ -436,7 +440,7 @@ unless ($search_on_sources) { $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' ); my @triples; foreach my $pkg (sort keys %pkgs) { - foreach $ver (@DISTS) { + foreach my $ver (@DISTS) { if (exists $pkgs{$pkg}{$ver}) { my @versions = version_sort keys %{$pkgs{$pkg}{$ver}}; foreach my $version (@versions) { @@ -458,21 +462,15 @@ unless ($search_on_sources) { print $rdf->serialise(@triples); } } else { - foreach my $line (@results) { - chomp($line); - @colon = split (/:/, $line); - ($package, $section, $ver, $binaries) = split (/ /, $#colon >1 ? $colon[1].":".$colon[2]:$colon[1], 4); - $section =~ s,^(non-free|contrib)/,,; - $section =~ s,^non-US.*$,non-US,,; - $colon[0] =~ m,.*/([^/]+)/([^/]+)/Sources\.,; #$1=stable, $2=main + foreach (@results) { + my ($package, $suite, $section, $subsection, $priority, + $version, $binaries) = @$_; - my ($suite, $part) = ($1, $2); - $pkgs{$package}{$suite} = $ver; - $sect{$package}{$suite}{source} = $section; - $part{$package}{$suite}{source} = $part unless $part eq 'main'; + $pkgs{$package}{$suite} = $version; + $sect{$package}{$suite}{source} = $subsection; + $part{$package}{$suite}{source} = $section unless $section eq 'main'; $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ]; - } if ($format eq 'html') { @@ -484,7 +482,7 @@ unless ($search_on_sources) { next if ($count < $start) or ($count > $end); printf "

    Source package %s

    \n", $pkg; print "
      \n"; - foreach $ver (@DISTS) { + foreach my $ver (@SUITES) { if (exists $pkgs{$pkg}{$ver}) { my $part_str = ""; if ($part{$pkg}{$ver}{source}) { @@ -495,12 +493,12 @@ unless ($search_on_sources) { print "
      Binary packages: "; my @bp_links; foreach my $bp (@{$binaries{$pkg}{$ver}}) { - my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || ''; - $sect =~ s,^(non-free|contrib)/,,; - $sect =~ s,^non-US.*$,non-US,,; + my $sect = 'section'; + my $bp_link; if ($sect) { - $bp_link = sprintf "%s", $ver, $sect, uri_escape( $bp ), $bp; + $bp_link = sprintf( "%s", + $ver, $sect, uri_escape( $bp ), $bp ); } else { $bp_link = $bp; } @@ -518,7 +516,7 @@ unless ($search_on_sources) { $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' ); my @triples; foreach my $pkg (sort keys %pkgs) { - foreach $ver (@DISTS) { + foreach my $ver (@SUITES) { if (exists $pkgs{$pkg}{$ver}) { my $id = "$ROOT/$ver/source/$pkg"; @@ -551,7 +549,9 @@ sub printindexline { my $index_line; if ($no_results > $results_per_page) { - $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results); + $index_line = prevlink($input,\%params)." | ". + indexline( $input, \%params, $no_results)." | ". + nextlink($input,\%params, $no_results); print "

      $index_line

      "; }