X-Git-Url: https://git.deb.at/?a=blobdiff_plain;f=cgi-bin%2Fsearch_packages.pl;h=0723387264e48fabdf3bb59506f924192542c423;hb=1a910dd8949a5a4ce3c93fa581a1f70fe5675997;hp=c3877fc04d6ae600602fc1dc05958b15f032c09d;hpb=11d34e2708df7b0bcdfe7d7bcbccdc5d10326457;p=deb%2Fpackages.git diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl index c3877fc..0723387 100755 --- a/cgi-bin/search_packages.pl +++ b/cgi-bin/search_packages.pl @@ -26,12 +26,11 @@ use Deb::Versions; use Packages::Search qw( :all ); use Packages::HTML (); -my $thisscript = "search_packages.pl"; +my $thisscript = $Packages::HTML::SEARCH_CGI; my $HOME = "http://www.debian.org"; my $ROOT = ""; my $SEARCHPAGE = "http://packages.debian.org/"; my @SUITES = qw( oldstable stable testing unstable experimental ); -my @DISTS = @SUITES; my @SECTIONS = qw( main contrib non-free ); my @ARCHIVES = qw( us security installer ); my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64 @@ -44,13 +43,19 @@ my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES; $ENV{PATH} = "/bin:/usr/bin"; # Read in all the variables set by the form -my $input = new CGI; +my $input; +if ($ARGV[0] && ($ARGV[0] eq 'php')) { + $input = new CGI(\*STDIN); +} else { + $input = new CGI; +} my $pet0 = new Benchmark; # use this to disable debugging in production mode completly my $debug_allowed = 1; my $debug = $debug_allowed && $input->param("debug"); -$Search::Param::debug = 1 if $debug > 1; +$debug = 0 if not defined($debug); +$Packages::Search::debug = 1 if $debug > 1; # If you want, just print out a list of all of the variables and exit. print $input->header if $debug; @@ -73,30 +78,42 @@ if (my $path = $input->param('path')) { } } -my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' }, +my ( $format, $keyword, $case, $subword, $exact, $searchon, + @suites, @sections, @archs ); + +my %params_def = ( keywords => { default => undef, + match => '^\s*([-+\@\w\/.:]+)\s*$', + var => \$keyword }, suite => { default => 'stable', match => '^(\w+)$', alias => 'version', array => ',', + var => \@suites, replace => { all => \@SUITES } }, - case => { default => 'insensitive', match => '^(\w+)$' }, - official => { default => 0, match => '^(\w+)$' }, - use_cache => { default => 1, match => '^(\w+)$' }, - subword => { default => 0, match => '^(\w+)$' }, - exact => { default => undef, match => '^(\w+)$' }, - searchon => { default => 'all', match => '^(\w+)$' }, + case => { default => 'insensitive', match => '^(\w+)$', + var => \$case }, +# official => { default => 0, match => '^(\w+)$' }, +# use_cache => { default => 1, match => '^(\w+)$' }, + subword => { default => 0, match => '^(\w+)$', + var => \$subword }, + exact => { default => undef, match => '^(\w+)$', + var => \$exact }, + searchon => { default => 'all', match => '^(\w+)$', + var => \$searchon }, section => { default => 'all', match => '^([\w-]+)$', alias => 'release', array => ',', + var => \@sections, replace => { all => \@SECTIONS } }, arch => { default => 'any', match => '^(\w+)$', - array => ',', replace => + array => ',', var => \@archs, replace => { any => \@ARCHITECTURES } }, archive => { default => 'all', match => '^(\w+)$', array => ',', replace => { all => \@ARCHIVES } }, - format => { default => 'html', match => '^(\w+)$' }, + format => { default => 'html', match => '^(\w+)$', + var => \$format }, ); -my %params = Packages::Search::parse_params( $input, \%params_def ); +my %opts; +my %params = Packages::Search::parse_params( $input, \%params_def, \%opts ); -my $format = $params{values}{format}{final}; #XXX: Don't use alternative output formats yet $format = 'html'; @@ -111,20 +128,12 @@ if ($params{errors}{keywords}) { print "Error: keyword not valid or missing" if $format eq 'html'; exit 0; } -my $keyword = $params{values}{keywords}{final}; -my @suites = @{$params{values}{suite}{final}}; -my $official = $params{values}{official}{final}; -my $use_cache = $params{values}{use_cache}{final}; -my $case = $params{values}{case}{final}; + my $case_bool = ( $case !~ /insensitive/ ); -my $subword = $params{values}{subword}{final}; -my $exact = $params{values}{exact}{final}; $exact = !$subword unless defined $exact; -my $searchon = $params{values}{searchon}{final}; -my @sections = @{$params{values}{section}{final}}; -my @archs = @{$params{values}{arch}{final}}; -my $page = $params{values}{page}{final}; -my $results_per_page = $params{values}{number}{final}; +$opts{h_suites} = { map { $_ => 1 } @suites }; +$opts{h_sections} = { map { $_ => 1 } @sections }; +$opts{h_archs} = { map { $_ => 1 } @archs }; # for URL construction my $suites_param = join ',', @{$params{values}{suite}{no_replace}}; @@ -177,40 +186,163 @@ my $search_on_sources = 0; my $st0 = new Benchmark; my @results; +my $too_many_hits; if ($searchon eq 'sourcenames') { $search_on_sources = 1; } -my %suites = map { $_ => 1 } @suites; -my %sections = map { $_ => 1 } @sections; -my %archs = map { $_ => 1 } @archs; - -print "DEBUG: suites=@suites, sections=@sections, archs=@archs
" if $debug > 2; - -if ($searchon eq 'names') { +sub read_entry { + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 7 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($opts->{h_suites}{$data[0]} + && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all') + && $opts->{h_sections}{$data[2]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } + } +} +sub read_src_entry { + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 5 ); + print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; + if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) { + print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + push @$results, [ $key, @data ]; + } + } +} +sub do_names_search { + my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_; + my @results; - $keyword = lc $keyword unless $case_bool; + $keyword = lc $keyword unless $opts->{case_bool}; - my %packages; - tie %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$file: $!"; - my $result = $packages{$keyword}; - foreach (split /\000/, $result) { - my @data = split ( /\s/, $_, 6 ); - #FIXME, should be done on db generation - if ($data[2] =~ m,/,) { - $data[2] =~ s,/.*$,,; + if ($opts->{exact}) { + &$read_entry( \%packages, $keyword, \@results, $opts ); + } else { + my ($key, $prefixes) = ($keyword, ''); + my %pkgs; + my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie postfix db $DBDIR/$postfix_file: $!"; + $p_obj->seq( $key, $prefixes, R_CURSOR ); + while (index($key, $keyword) >= 0) { + if ($prefixes =~ /^\001(\d+)/o) { + $too_many_hits += $1; + } else { + foreach (split /\000/o, $prefixes) { + $_ = '' if $_ eq '^'; + print "DEBUG: add word $_$key
" if $debug > 2; + $pkgs{$_.$key}++; + } + } + last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0; + last if $too_many_hits or keys %pkgs >= 100; + } + + my $no_results = keys %pkgs; + if ($too_many_hits || ($no_results >= 100)) { + $too_many_hits += $no_results; + %pkgs = ( $keyword => 1 ); + } + foreach my $pkg (sort keys %pkgs) { + &$read_entry( \%packages, $pkg, \@results, $opts ); + } + } + return \@results; +} +sub do_fulltext_search { + my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_; + my @results; + + my @lines; + my $regex; + if ($opts->{case_bool}) { + if ($opts->{exact}) { + $regex = qr/\b\Q$keyword\E\b/o; } else { - $data[2] = 'main'; + $regex = qr/\Q$keyword\E/o; } - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') - && $sections{$data[2]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; - push @results, [ $keyword, @data ]; + } else { + if ($exact) { + $regex = qr/\b\Q$keyword\E\b/io; + } else { + $regex = qr/\Q$keyword\E/io; + } + } + + open DESC, '<', "$DBDIR/$file" + or die "couldn't open $DBDIR/$file: $!"; + while () { + $_ =~ $regex or next; + print "DEBUG: Matched line $.
" if $debug > 2; + push @lines, $.; + } + close DESC; + + tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$lookup: $!"; + tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$mapping: $!"; + + my %tmp_results; + foreach my $l (@lines) { + my $result = $did2pkg{$l}; + foreach (split /\000/o, $result) { + my @data = split /\s/, $_, 3; + next unless $opts->{h_archs}{$data[2]}; + $tmp_results{$data[0]}++; } } + foreach my $pkg (keys %tmp_results) { + &$read_entry( \%packages, $pkg, \@results, $opts ); + } + return \@results; +} + +sub find_binaries { + my ($pkg, $suite) = @_; + + tie my %src2bin, 'DB_File', "$DBDIR/sources_packages.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't open $DBDIR/sources_packages.db: $!"; + + my $bins = $src2bin{$pkg} || ''; + my %bins; + foreach (split /\000/o, $bins) { + my @data = split /\s/, $_, 4; + + if ($data[0] eq $suite) { + $bins{$data[1]}++; + } + } + + return [ keys %bins ]; +} + +if ($searchon eq 'names') { + push @results, @{ do_names_search( $keyword, 'packages_small.db', + 'package_postfixes.db', + \&read_entry, \%opts ) }; +} elsif ($searchon eq 'sourcenames') { + push @results, @{ do_names_search( $keyword, 'sources_small.db', + 'source_postfixes.db', + \&read_src_entry, \%opts ) }; +} else { + push @results, @{ do_names_search( $keyword, 'packages_small.db', + 'package_postfixes.db', + \&read_entry, \%opts ) }; + push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt', + 'descriptions_packages.db', + 'packages_small.db', + \&read_entry, \%opts ) }; } my $st1 = new Benchmark; @@ -234,6 +366,10 @@ if ($format eq 'html') { } } +if ($too_many_hits) { + print "

Your search was too wide so we will only display exact matches. At least $too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.

"; +} + if (!@results) { if ($format eq 'html') { my $keyword_esc = uri_escape( $keyword ); @@ -278,15 +414,15 @@ my (%pkgs, %sect, %part, %desc, %binaries); unless ($search_on_sources) { foreach (@results) { - my ($pkg_t, $suite, $arch, $section, $priority, $version, $desc) = @$_; + my ($pkg_t, $suite, $arch, $section, $subsection, + $priority, $version, $desc) = @$_; my ($package) = $pkg_t =~ m/^(.+)/; # untaint $pkgs{$package}{$suite}{$version}{$arch} = 1; - $sect{$package}{$suite}{$version} = 'subsection'; + $sect{$package}{$suite}{$version} = $subsection; $part{$package}{$suite}{$version} = $section unless $section eq 'main'; $desc{$package}{$suite}{$version} = $desc; - } if ($format eq 'html') { @@ -317,43 +453,17 @@ unless ($search_on_sources) { } print "\n"; } - } elsif ($format eq 'xml') { - require RDF::Simple::Serialiser; - my $rdf = new RDF::Simple::Serialiser; - $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' ); - my @triples; - foreach my $pkg (sort keys %pkgs) { - foreach my $ver (@DISTS) { - if (exists $pkgs{$pkg}{$ver}) { - my @versions = version_sort keys %{$pkgs{$pkg}{$ver}}; - foreach my $version (@versions) { - my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version"; - push @triples, [ $id, 'debpkg:package', $pkg ]; - push @triples, [ $id, 'debpkg:version', $version ]; - push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ]; - push @triples, [ $id, 'debpkg:suite', $ver ]; - push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ]; - push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ]; - foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) { - push @triples, [ $id, 'debpkg:architecture', $arch ]; - } - } - } - } - } - - print $rdf->serialise(@triples); } } else { foreach (@results) { - my ($package, $suite, $section, $version, $binaries); + my ($package, $suite, $section, $subsection, $priority, + $version) = @$_; $pkgs{$package}{$suite} = $version; - $sect{$package}{$suite}{source} = 'subsection'; + $sect{$package}{$suite}{source} = $subsection; $part{$package}{$suite}{source} = $section unless $section eq 'main'; - $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ]; - + $binaries{$package}{$suite} = find_binaries( $package, $suite ); } if ($format eq 'html') { @@ -376,15 +486,8 @@ unless ($search_on_sources) { print "
Binary packages: "; my @bp_links; foreach my $bp (@{$binaries{$pkg}{$ver}}) { - my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || ''; - $sect =~ s,^(non-free|contrib)/,,; - $sect =~ s,^non-US.*$,non-US,,; - my $bp_link; - if ($sect) { - $bp_link = sprintf "%s", $ver, $sect, uri_escape( $bp ), $bp; - } else { - $bp_link = $bp; - } + my $bp_link = sprintf( "%s", + $ver, uri_escape( $bp ), $bp ); push @bp_links, $bp_link; } print join( ", ", @bp_links ); @@ -393,29 +496,6 @@ unless ($search_on_sources) { } print "\n"; } - } elsif ($format eq 'xml') { - require RDF::Simple::Serialiser; - my $rdf = new RDF::Simple::Serialiser; - $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' ); - my @triples; - foreach my $pkg (sort keys %pkgs) { - foreach my $ver (@SUITES) { - if (exists $pkgs{$pkg}{$ver}) { - my $id = "$ROOT/$ver/source/$pkg"; - - push @triples, [ $id, 'debpkg:package', $pkg ]; - push @triples, [ $id, 'debpkg:type', 'source' ]; - push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{source} ]; - push @triples, [ $id, 'debpkg:version', $pkgs{$pkg}{$ver} ]; - push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{source} || 'main' ]; - - foreach my $bp (@{$binaries{$pkg}{$ver}}) { - push @triples, [ $id, 'debpkg:binary', $bp ]; - } - } - } - } - print $rdf->serialise(@triples); } } @@ -430,9 +510,11 @@ sub printindexline { my $no_results = shift; my $index_line; - if ($no_results > $results_per_page) { + if ($no_results > $opts{number}) { - $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results); + $index_line = prevlink($input,\%params)." | ". + indexline( $input, \%params, $no_results)." | ". + nextlink($input,\%params, $no_results); print "

$index_line

"; } @@ -442,10 +524,10 @@ sub multipageheader { my $no_results = shift; my ($start, $end); - if ($results_per_page =~ /^all$/i) { + if ($opts{number} =~ /^all$/i) { $start = 1; $end = $no_results; - $results_per_page = $no_results; + $opts{number} = $no_results; } else { $start = Packages::Search::start( \%params ); $end = Packages::Search::end( \%params ); @@ -465,7 +547,7 @@ sub multipageheader { print "

Results per page: "; my @resperpagelinks; for (50, 100, 200) { - if ($results_per_page == $_) { + if ($opts{number} == $_) { push @resperpagelinks, $_; } else { push @resperpagelinks, resperpagelink($input,\%params,$_); @@ -491,5 +573,11 @@ print < END +my $pete = new Benchmark; +my $petd = timediff($pete, $pet0); +print "Total page evaluation took ".timestr($petd)."
" + if $debug_allowed; print $input->end_html; } + +# vim: ts=8 sw=4