X-Git-Url: https://git.deb.at/?a=blobdiff_plain;f=cgi-bin%2Fsearch_packages.pl;h=88f9877a01d1ecc66748b1e938d0806ad9dcfb7a;hb=de413c76fc1abbada02c3fb8174d7ac3e4cd5a0b;hp=4e2225ca25114b8f650ca68e666143078b79b84b;hpb=7ca948e9df320aa1785beda7bfcf927cf676af43;p=deb%2Fpackages.git diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl index 4e2225c..88f9877 100755 --- a/cgi-bin/search_packages.pl +++ b/cgi-bin/search_packages.pl @@ -26,7 +26,7 @@ use Deb::Versions; use Packages::Search qw( :all ); use Packages::HTML (); -my $thisscript = "search_packages.pl"; +my $thisscript = $Packages::HTML::SEARCH_CGI; my $HOME = "http://www.debian.org"; my $ROOT = ""; my $SEARCHPAGE = "http://packages.debian.org/"; @@ -44,12 +44,18 @@ my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES; $ENV{PATH} = "/bin:/usr/bin"; # Read in all the variables set by the form -my $input = new CGI; +my $input; +if ($ARGV[0] eq 'php') { + $input = new CGI(\*STDIN); +} else { + $input = new CGI; +} my $pet0 = new Benchmark; # use this to disable debugging in production mode completly my $debug_allowed = 1; my $debug = $debug_allowed && $input->param("debug"); +$debug = 0 if not defined($debug); $Search::Param::debug = 1 if $debug > 1; # If you want, just print out a list of all of the variables and exit. @@ -177,6 +183,7 @@ my $search_on_sources = 0; my $st0 = new Benchmark; my @results; +my $too_much_hits; if ($searchon eq 'sourcenames') { $search_on_sources = 1; } @@ -187,72 +194,123 @@ my %archs = map { $_ => 1 } @archs; print "DEBUG: suites=@suites, sections=@sections, archs=@archs
" if $debug > 2; -if ($searchon eq 'names') { +sub read_entry { + my ($hash, $key, $results) = @_; + my $result = $hash->{$key}; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 7 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') + && $sections{$data[2]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } + } +} +sub read_src_entry { + my ($hash, $key, $results) = @_; + my $result = $hash->{$key}; + + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 5 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($suites{$data[0]} && $sections{$data[1]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } + } +} +sub do_names_search { + my ($keyword, $file, $postfix_file, $read_entry) = @_; $keyword = lc $keyword unless $case_bool; - my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$file: $!"; if ($exact) { - my $result = $packages{$keyword}; - foreach (split /\000/, $result) { - my @data = split ( /\s/, $_, 7 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') - && $sections{$data[2]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; - push @results, [ $keyword, @data ]; - } - } + &$read_entry( \%packages, $keyword, \@results ); } else { - while (my ($pkg, $result) = each %packages) { - #what's faster? I can't really see a difference - (index($pkg, $keyword) >= 0) or next; - #$pkg =~ /\Q$keyword\E/ or next; - foreach (split /\000/, $packages{$pkg}) { - my @data = split ( /\s/, $_, 7 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') - && $sections{$data[2]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; - push @results, [ $pkg , @data ]; + my ($key, $prefixes) = ($keyword, ''); + my %pkgs; + my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie postfix db $DBDIR/$postfix_file: $!"; + $p_obj->seq( $key, $prefixes, R_CURSOR ); + while (index($key, $keyword) >= 0) { + if ($prefixes =~ /^\001(\d+)/o) { + $too_much_hits += $1; + } else { + foreach (split /\000/o, $prefixes) { + $_ = '' if $_ eq '^'; + print "DEBUG: add word $_$key
" if $debug > 2; + $pkgs{$_.$key}++; } } + last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0; + last if $too_much_hits; + last if keys %pkgs < 100; + } + + my $no_results = keys %pkgs; + if ($too_much_hits || ($no_results >= 100)) { + $too_much_hits += $no_results; + %pkgs = ( $keyword => 1 ); + } + foreach my $pkg (sort keys %pkgs) { + &$read_entry( \%packages, $pkg, \@results ); } } +} + +if ($searchon eq 'names') { + do_names_search( $keyword, 'packages_small.db', + 'package_postfixes.db', \&read_entry ); } elsif ($searchon eq 'sourcenames') { - - $keyword = lc $keyword unless $case_bool; - - my $obj = tie my %packages, 'DB_File', "$DBDIR/sources_small.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/sources_small.db: $!"; - - if ($exact) { - my $result = $packages{$keyword}; - foreach (split /\000/, $result) { - my @data = split ( /\s/, $_, 5 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && $sections{$data[1]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; - push @results, [ $keyword, @data ]; - } + do_names_search( $keyword, 'sources_small.db', + 'source_postfixes.db', \&read_src_entry ); +} else { + + my @lines; + my $regex; + if ($case_bool) { + if ($exact) { + $regex = qr/\b\Q$keyword\E\b/o; + } else { + $regex = qr/\Q$keyword\E/o; } } else { - while (my ($pkg, $result) = each %packages) { - #what's faster? I can't really see a difference - (index($pkg, $keyword) >= 0) or next; - #$pkg =~ /\Q$keyword\E/ or next; - foreach (split /\000/, $packages{$pkg}) { - my @data = split ( /\s/, $_, 5 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && $sections{$data[1]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; - push @results, [ $pkg , @data ]; - } - } + if ($exact) { + $regex = qr/\b\Q$keyword\E\b/io; + } else { + $regex = qr/\Q$keyword\E/io; + } + } + + open DESC, '<', "$DBDIR/descriptions.txt" or die "couldn't open $DBDIR/descriptions.txt: $!"; + while () { + $_ =~ $regex or next; + print "DEBUG: Matched line $.
" if $debug > 2; + push @lines, $.; + } + close DESC; + + my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + my $obj = tie my %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!"; + + my %tmp_results; + foreach my $l (@lines) { + my $result = $did2pkg{$l}; + foreach (split /\000/o, $result) { + my @data = split /\s/, $_, 3; + next unless $archs{$data[2]}; + $tmp_results{$data[0]}++; } } + foreach my $pkg (keys %tmp_results) { + read_entry( \%packages, $pkg, \@results ); + } } my $st1 = new Benchmark; @@ -276,6 +334,10 @@ if ($format eq 'html') { } } +if ($too_much_hits) { +print "

Your search was too wide so we will only display exact matches. At least $too_much_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.

"; +} + if (!@results) { if ($format eq 'html') { my $keyword_esc = uri_escape( $keyword ); @@ -538,3 +600,5 @@ END print $input->end_html; } + +# vim: ts=8 sw=4