X-Git-Url: https://git.deb.at/?a=blobdiff_plain;f=cgi-bin%2Fsearch_packages.pl;h=b62821dd5a33a41639ba83cc1a859a6d40d2ffa2;hb=5182ae034dddba23fe956fbfc373829e6574b88e;hp=255cd566370ad54271338a983a916377bba1122f;hpb=33a759c9a42bbbfb25378ea5e3c3eabf97d57b23;p=deb%2Fpackages.git diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl index 255cd56..b62821d 100755 --- a/cgi-bin/search_packages.pl +++ b/cgi-bin/search_packages.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -wT -# +# $Id$ # search_packages.pl -- CGI interface to the Packages files on packages.debian.org # # Copyright (C) 1998 James Treacy @@ -31,7 +31,6 @@ my $HOME = "http://www.debian.org"; my $ROOT = ""; my $SEARCHPAGE = "http://packages.debian.org/"; my @SUITES = qw( oldstable stable testing unstable experimental ); -my @DISTS = @SUITES; my @SECTIONS = qw( main contrib non-free ); my @ARCHIVES = qw( us security installer ); my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64 @@ -45,7 +44,7 @@ $ENV{PATH} = "/bin:/usr/bin"; # Read in all the variables set by the form my $input; -if ($ARGV[0] eq 'php') { +if ($ARGV[0] && ($ARGV[0] eq 'php')) { $input = new CGI(\*STDIN); } else { $input = new CGI; @@ -56,12 +55,7 @@ my $pet0 = new Benchmark; my $debug_allowed = 1; my $debug = $debug_allowed && $input->param("debug"); $debug = 0 if not defined($debug); -$Search::Param::debug = 1 if $debug > 1; - -# If you want, just print out a list of all of the variables and exit. -print $input->header if $debug; -# print $input->dump; -# exit; +#$Packages::Search::debug = 1 if $debug > 1; if (my $path = $input->param('path')) { my @components = map { lc $_ } split /\//, $path; @@ -79,58 +73,103 @@ if (my $path = $input->param('path')) { } } -my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' }, +my ( $format, $keyword, $case, $subword, $exact, $searchon, + @suites, @sections, @archs ); + +my %params_def = ( keywords => { default => undef, + match => '^\s*([-+\@\w\/.:]+)\s*$', + var => \$keyword }, suite => { default => 'stable', match => '^(\w+)$', alias => 'version', array => ',', + var => \@suites, replace => { all => \@SUITES } }, - case => { default => 'insensitive', match => '^(\w+)$' }, - official => { default => 0, match => '^(\w+)$' }, - use_cache => { default => 1, match => '^(\w+)$' }, - subword => { default => 0, match => '^(\w+)$' }, - exact => { default => undef, match => '^(\w+)$' }, - searchon => { default => 'all', match => '^(\w+)$' }, + case => { default => 'insensitive', match => '^(\w+)$', + var => \$case }, +# official => { default => 0, match => '^(\w+)$' }, +# use_cache => { default => 1, match => '^(\w+)$' }, + subword => { default => 0, match => '^(\w+)$', + var => \$subword }, + exact => { default => undef, match => '^(\w+)$', + var => \$exact }, + searchon => { default => 'all', match => '^(\w+)$', + var => \$searchon }, section => { default => 'all', match => '^([\w-]+)$', alias => 'release', array => ',', + var => \@sections, replace => { all => \@SECTIONS } }, arch => { default => 'any', match => '^(\w+)$', - array => ',', replace => + array => ',', var => \@archs, replace => { any => \@ARCHITECTURES } }, archive => { default => 'all', match => '^(\w+)$', array => ',', replace => { all => \@ARCHIVES } }, - format => { default => 'html', match => '^(\w+)$' }, + format => { default => 'html', match => '^(\w+)$', + var => \$format }, ); -my %params = Packages::Search::parse_params( $input, \%params_def ); +my %opts; +my %params = Packages::Search::parse_params( $input, \%params_def, \%opts ); -my $format = $params{values}{format}{final}; #XXX: Don't use alternative output formats yet $format = 'html'; - if ($format eq 'html') { print $input->header; -} elsif ($format eq 'xml') { -# print $input->header( -type=>'application/rdf+xml' ); - print $input->header( -type=>'text/plain' ); +} + +my (@errors, @debug, @msgs, @hints); +sub error { + push @errors, $_[0]; +} +sub hint { + push @hints, $_[0]; +} +sub debug { + my $lvl = $_[1] || 0; + push(@debug, $_[0]) if $debug > $lvl; +} +sub msg { + push @msgs, $_[0]; +} +sub print_errors { + return unless @errors; + print '
'; + foreach (@errors) { + print "

$_

"; + } + print '
'; +} +sub print_debug { + return unless $debug && @debug; + print '
'; + print '

Debugging:

';
+    foreach (@debug) {
+	print "$_\n";
+    }
+    print '
'; + +} +sub print_hints { + return unless @hints; + print '
'; + foreach (@hints) { + print "

$_

"; + } + print '
'; +} +sub print_msgs { + foreach (@msgs) { + print "

$_

"; + } } if ($params{errors}{keywords}) { - print "Error: keyword not valid or missing" if $format eq 'html'; - exit 0; + error( "Error: keyword not valid or missing" ); } -my $keyword = $params{values}{keywords}{final}; -my @suites = @{$params{values}{suite}{final}}; -my $official = $params{values}{official}{final}; -my $use_cache = $params{values}{use_cache}{final}; -my $case = $params{values}{case}{final}; + my $case_bool = ( $case !~ /insensitive/ ); -my $subword = $params{values}{subword}{final}; -my $exact = $params{values}{exact}{final}; $exact = !$subword unless defined $exact; -my $searchon = $params{values}{searchon}{final}; -my @sections = @{$params{values}{section}{final}}; -my @archs = @{$params{values}{arch}{final}}; -my $page = $params{values}{page}{final}; -my $results_per_page = $params{values}{number}{final}; +$opts{h_suites} = { map { $_ => 1 } @suites }; +$opts{h_sections} = { map { $_ => 1 } @sections }; +$opts{h_archs} = { map { $_ => 1 } @archs }; # for URL construction my $suites_param = join ',', @{$params{values}{suite}{no_replace}}; @@ -145,36 +184,16 @@ my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_repl my $archs_enc = encode_entities join ', ', @{$params{values}{arch}{no_replace}}; my $pet1 = new Benchmark; my $petd = timediff($pet1, $pet0); -print "DEBUG: Parameter evaluation took ".timestr($petd)."
" if $debug; - -if ($format eq 'html') { -print Packages::HTML::header( title => 'Package Search Results' , - lang => 'en', - title_tag => 'Debian Package Search Results', - print_title_above => 1, - print_search_field => 'packages', - search_field_values => { - keywords => $keyword_enc, - searchon => $searchon, - arch => $archs_enc, - suite => $suites_enc, - section => $sections_enc, - subword => $subword, - exact => $exact, - case => $case, - }, - ); -} +debug( "Parameter evaluation took ".timestr($petd) ); # read the configuration my $topdir; if (!open (C, "../config.sh")) { - print "\nInternal Error: Cannot open configuration file.\n\n" -if $format eq 'html'; - exit 0; + error( "Internal Error: Cannot open configuration file." ); } while () { - $topdir = $1 if (/^\s*topdir="?(.*)"?\s*$/); + $topdir = $1 if /^\s*topdir="?(.*)"?\s*$/; + $ROOT = $1 if /^\s*root="?(.*)"?\s*$/; } close (C); @@ -188,48 +207,62 @@ if ($searchon eq 'sourcenames') { $search_on_sources = 1; } -my %suites = map { $_ => 1 } @suites; -my %sections = map { $_ => 1 } @sections; -my %archs = map { $_ => 1 } @archs; - -print "DEBUG: suites=@suites, sections=@sections, archs=@archs
" if $debug > 2; +sub print_header { + print Packages::HTML::header( title => 'Package Search Results' , + lang => 'en', + title_tag => 'Debian Package Search Results', + print_title_above => 1, + print_search_field => 'packages', + search_field_values => { + keywords => $keyword_enc, + searchon => $searchon, + arch => $archs_enc, + suite => $suites_enc, + section => $sections_enc, + subword => $subword, + exact => $exact, + case => $case, + }, + ); +} sub read_entry { - my ($hash, $key, $results) = @_; - my $result = $hash->{$key}; + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; foreach (split /\000/, $result) { my @data = split ( /\s/, $_, 7 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all') - && $sections{$data[2]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + debug( "Considering entry ".join( ':', @data), 2); + if ($opts->{h_suites}{$data[0]} + && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all') + && $opts->{h_sections}{$data[2]}) { + debug( "Using entry ".join( ':', @data), 2); push @$results, [ $key, @data ]; } } } sub read_src_entry { - my ($hash, $key, $results) = @_; - my $result = $hash->{$key}; - + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; foreach (split /\000/, $result) { my @data = split ( /\s/, $_, 5 ); - print "DEBUG: Considering entry ".join( ':', @data)."
" if $debug > 2; - if ($suites{$data[0]} && $sections{$data[1]}) { - print "DEBUG: Using entry ".join( ':', @data)."
" if $debug > 2; + debug( "Considering entry ".join( ':', @data), 2); + if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) { + debug( "Using entry ".join( ':', @data), 2); push @$results, [ $key, @data ]; } } } sub do_names_search { - my ($keyword, $file, $postfix_file, $read_entry) = @_; + my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_; + my @results; - $keyword = lc $keyword unless $case_bool; + $keyword = lc $keyword unless $opts->{case_bool}; my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE or die "couldn't tie DB $DBDIR/$file: $!"; - if ($exact) { - &$read_entry( \%packages, $keyword, \@results ); + if ($opts->{exact}) { + &$read_entry( \%packages, $keyword, \@results, $opts ); } else { my ($key, $prefixes) = ($keyword, ''); my %pkgs; @@ -242,7 +275,7 @@ sub do_names_search { } else { foreach (split /\000/o, $prefixes) { $_ = '' if $_ eq '^'; - print "DEBUG: add word $_$key
" if $debug > 2; + debug( "add word $_$key", 2); $pkgs{$_.$key}++; } } @@ -256,65 +289,100 @@ sub do_names_search { %pkgs = ( $keyword => 1 ); } foreach my $pkg (sort keys %pkgs) { - &$read_entry( \%packages, $pkg, \@results ); + &$read_entry( \%packages, $pkg, \@results, $opts ); } } + return \@results; } - -if ($searchon eq 'names') { - do_names_search( $keyword, 'packages_small.db', - 'package_postfixes.db', \&read_entry ); -} elsif ($searchon eq 'sourcenames') { - do_names_search( $keyword, 'sources_small.db', - 'source_postfixes.db', \&read_src_entry ); -} else { +sub do_fulltext_search { + my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_; + my @results; my @lines; my $regex; - if ($case_bool) { - if ($exact) { + if ($opts->{case_bool}) { + if ($opts->{exact}) { $regex = qr/\b\Q$keyword\E\b/o; } else { $regex = qr/\Q$keyword\E/o; } } else { - if ($exact) { + if ($opts->{exact}) { $regex = qr/\b\Q$keyword\E\b/io; } else { $regex = qr/\Q$keyword\E/io; } } - open DESC, '<', "$DBDIR/descriptions.txt" or die "couldn't open $DBDIR/descriptions.txt: $!"; + open DESC, '<', "$DBDIR/$file" + or die "couldn't open $DBDIR/$file: $!"; while () { $_ =~ $regex or next; - print "DEBUG: Matched line $.
" if $debug > 2; + debug( "Matched line $.", 2); push @lines, $.; } close DESC; - my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/packages_small.db: $!"; - my $obj = tie my %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!"; + tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$lookup: $!"; + tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/$mapping: $!"; my %tmp_results; foreach my $l (@lines) { my $result = $did2pkg{$l}; foreach (split /\000/o, $result) { my @data = split /\s/, $_, 3; - next unless $archs{$data[2]}; + next unless $opts->{h_archs}{$data[2]}; $tmp_results{$data[0]}++; } } foreach my $pkg (keys %tmp_results) { - read_entry( \%packages, $pkg, \@results ); + &$read_entry( \%packages, $pkg, \@results, $opts ); } + return \@results; +} + +sub find_binaries { + my ($pkg, $suite) = @_; + + tie my %src2bin, 'DB_File', "$DBDIR/sources_packages.db", O_RDONLY, 0666, $DB_BTREE + or die "couldn't open $DBDIR/sources_packages.db: $!"; + + my $bins = $src2bin{$pkg} || ''; + my %bins; + foreach (split /\000/o, $bins) { + my @data = split /\s/, $_, 4; + + if ($data[0] eq $suite) { + $bins{$data[1]}++; + } + } + + return [ keys %bins ]; +} + +if ($searchon eq 'names') { + push @results, @{ do_names_search( $keyword, 'packages_small.db', + 'package_postfixes.db', + \&read_entry, \%opts ) }; +} elsif ($searchon eq 'sourcenames') { + push @results, @{ do_names_search( $keyword, 'sources_small.db', + 'source_postfixes.db', + \&read_src_entry, \%opts ) }; +} else { + push @results, @{ do_names_search( $keyword, 'packages_small.db', + 'package_postfixes.db', + \&read_entry, \%opts ) }; + push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt', + 'descriptions_packages.db', + 'packages_small.db', + \&read_entry, \%opts ) }; } my $st1 = new Benchmark; my $std = timediff($st1, $st0); -print "DEBUG: Search took ".timestr($std)."
" if $debug; +debug( "Search took ".timestr($std) ); if ($format eq 'html') { my $suite_wording = $suites_enc eq "all" ? "all suites" @@ -326,15 +394,15 @@ if ($format eq 'html') { if (($searchon eq "names") || ($searchon eq 'sourcenames')) { my $source_wording = $search_on_sources ? "source " : ""; my $exact_wording = $exact ? "named" : "that names contain"; - print "

You have searched for ${source_wording}packages $exact_wording $keyword_enc in $suite_wording, $section_wording, and $arch_wording.

"; + msg( "You have searched for ${source_wording}packages $exact_wording $keyword_enc in $suite_wording, $section_wording, and $arch_wording." ); } else { my $exact_wording = $exact ? "" : " (including subword matching)"; - print "

You have searched for $keyword_enc in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording.

"; + msg( "You have searched for $keyword_enc in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording." ); } } if ($too_many_hits) { -print "

Your search was too wide so we will only display exact matches. At least $too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.

"; + error( "Your search was too wide so we will only display exact matches. At least $too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ); } if (!@results) { @@ -345,187 +413,138 @@ if (!@results) { if (($suites_enc eq 'all') && ($archs_enc eq 'any') && ($sections_enc eq 'all')) { - print "

Can't find that package.

\n"; + error( "Can't find that package." ); } else { - print "

Can't find that package, at least not in that suite ". - ( $search_on_sources ? "" : " and on that architecture" ). - ".

\n"; + error( "Can't find that package, at least not in that suite ". + ( $search_on_sources ? "" : " and on that architecture" ) ) } if ($exact) { - $printed = 1; - print "

You have searched only for exact matches of the package name. You can try to search for package names that contain your search string.

"; + hint( "You have searched only for exact matches of the package name. You can try to search for package names that contain your search string." ); } } else { if (($suites_enc eq 'all') && ($archs_enc eq 'any') && ($sections_enc eq 'all')) { - print "

Can't find that string.

\n"; + error( "Can't find that string." ); } else { - print "

Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc).

\n"; + error( "Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc)." ); } unless ($subword) { - $printed = 1; - print "

You have searched only for words exactly matching your keywords. You can try to search allowing subword matching.

"; + hint( "You have searched only for words exactly matching your keywords. You can try to search allowing subword matching." ); } } - print "

".( $printed ? "Or you" : "You" )." can try a different search on the Packages search page.

"; - - &printfooter; + hint( ( @hints ? "Or you" : "You" )." can try a different search on the Packages search page." ); + } - exit; } -my (%pkgs, %sect, %part, %desc, %binaries); +print_header; +print_msgs; +print_errors; +print_hints; +print_debug; +&print_results; +&printfooter; -unless ($search_on_sources) { - foreach (@results) { - my ($pkg_t, $suite, $arch, $section, $subsection, - $priority, $version, $desc) = @$_; - - my ($package) = $pkg_t =~ m/^(.+)/; # untaint - $pkgs{$package}{$suite}{$version}{$arch} = 1; - $sect{$package}{$suite}{$version} = $subsection; - $part{$package}{$suite}{$version} = $section unless $section eq 'main'; - - $desc{$package}{$suite}{$version} = $desc; +sub print_results { + return unless @results; - } + my (%pkgs, %sect, %part, %desc, %binaries); - if ($format eq 'html') { - my ($start, $end) = multipageheader( scalar keys %pkgs ); - my $count = 0; + unless ($search_on_sources) { + foreach (@results) { + my ($pkg_t, $suite, $arch, $section, $subsection, + $priority, $version, $desc) = @$_; - foreach my $pkg (sort keys %pkgs) { - $count++; - next if $count < $start or $count > $end; - printf "

Package %s

\n", $pkg; - print "\n"; + my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint + $pkgs{$pkg}{$suite}{$version}{$arch} = 1; + $sect{$pkg}{$suite}{$version} = $subsection; + $part{$pkg}{$suite}{$version} = $section + unless $section eq 'main'; + + $desc{$pkg}{$suite}{$version} = $desc; } - } elsif ($format eq 'xml') { - require RDF::Simple::Serialiser; - my $rdf = new RDF::Simple::Serialiser; - $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' ); - my @triples; - foreach my $pkg (sort keys %pkgs) { - foreach my $ver (@DISTS) { - if (exists $pkgs{$pkg}{$ver}) { - my @versions = version_sort keys %{$pkgs{$pkg}{$ver}}; - foreach my $version (@versions) { - my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version"; - push @triples, [ $id, 'debpkg:package', $pkg ]; - push @triples, [ $id, 'debpkg:version', $version ]; - push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ]; - push @triples, [ $id, 'debpkg:suite', $ver ]; - push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ]; - push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ]; - foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) { - push @triples, [ $id, 'debpkg:architecture', $arch ]; + + if ($format eq 'html') { + my ($start, $end) = multipageheader( scalar keys %pkgs ); + my $count = 0; + + foreach my $pkg (sort keys %pkgs) { + $count++; + next if $count < $start or $count > $end; + printf "

Package %s

\n", $pkg; + print "\n"; } } + } else { + foreach (@results) { + my ($pkg, $suite, $section, $subsection, $priority, + $version) = @$_; - print $rdf->serialise(@triples); - } -} else { - foreach (@results) { - my ($package, $suite, $section, $subsection, $priority, - $version, $binaries) = @$_; - - $pkgs{$package}{$suite} = $version; - $sect{$package}{$suite}{source} = $subsection; - $part{$package}{$suite}{source} = $section unless $section eq 'main'; + $pkgs{$pkg}{$suite} = $version; + $sect{$pkg}{$suite}{source} = $subsection; + $part{$pkg}{$suite}{source} = $section + unless $section eq 'main'; - $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ]; - } + $binaries{$pkg}{$suite} = find_binaries( $pkg, $suite ); + } - if ($format eq 'html') { - my ($start, $end) = multipageheader( scalar keys %pkgs ); - my $count = 0; - - foreach my $pkg (sort keys %pkgs) { - $count++; - next if ($count < $start) or ($count > $end); - printf "

Source package %s

\n", $pkg; - print "\n"; } } - print $rdf->serialise(@triples); } -} - -if ($format eq 'html') { - &printindexline( scalar keys %pkgs ); - &printfooter; + printindexline( scalar keys %pkgs ); } exit; @@ -534,7 +553,7 @@ sub printindexline { my $no_results = shift; my $index_line; - if ($no_results > $results_per_page) { + if ($no_results > $opts{number}) { $index_line = prevlink($input,\%params)." | ". indexline( $input, \%params, $no_results)." | ". @@ -548,10 +567,10 @@ sub multipageheader { my $no_results = shift; my ($start, $end); - if ($results_per_page =~ /^all$/i) { + if ($opts{number} =~ /^all$/i) { $start = 1; $end = $no_results; - $results_per_page = $no_results; + $opts{number} = $no_results; } else { $start = Packages::Search::start( \%params ); $end = Packages::Search::end( \%params ); @@ -571,7 +590,7 @@ sub multipageheader { print "

Results per page: "; my @resperpagelinks; for (50, 100, 200) { - if ($results_per_page == $_) { + if ($opts{number} == $_) { push @resperpagelinks, $_; } else { push @resperpagelinks, resperpagelink($input,\%params,$_); @@ -588,16 +607,15 @@ sub multipageheader { } sub printfooter { -print < - -


-

Packages search page

- -END + my $pete = new Benchmark; + my $petd = timediff($pete, $pet0); + print "Total page evaluation took ".timestr($petd)."
" + if $debug_allowed; -print $input->end_html; + my $trailer = Packages::HTML::trailer( $ROOT ); + $trailer =~ s/LAST_MODIFIED_DATE/gmtime()/e; #FIXME + print $trailer; } # vim: ts=8 sw=4