X-Git-Url: https://git.deb.at/w?a=blobdiff_plain;f=cgi-bin%2Fsearch_packages.pl;h=e7cb3b6ad32c358166aaa19fd2013ccdc112fd4c;hb=5ac2b90beab477b44b863b2aa895e8fb3fedcd07;hp=bc51c30f66c3ce0fbc0e479dca8ea1f5f28cbba7;hpb=e41d34b008ea4b4ad09b5eea457d7523c7ef2fa3;p=deb%2Fpackages.git
diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl
index bc51c30..e7cb3b6 100755
--- a/cgi-bin/search_packages.pl
+++ b/cgi-bin/search_packages.pl
@@ -1,112 +1,133 @@
#!/usr/bin/perl -wT
-#
+# $Id$
# search_packages.pl -- CGI interface to the Packages files on packages.debian.org
#
# Copyright (C) 1998 James Treacy
# Copyright (C) 2000, 2001 Josip Rodin
# Copyright (C) 2001 Adam Heath
# Copyright (C) 2004 Martin Schulze
-# Copyright (C) 2004 Frank Lichtenheld
+# Copyright (C) 2004-2006 Frank Lichtenheld
#
# use is allowed under the terms of the GNU Public License (GPL)
# see http://www.fsf.org/copyleft/gpl.html for a copy of the license
-require 5.001;
use strict;
+use lib '../lib';
use CGI qw( -oldstyle_urls );
-#use CGI::Carp qw( fatalsToBrowser );
+use CGI::Carp qw( fatalsToBrowser );
use POSIX;
use URI::Escape;
use HTML::Entities;
use DB_File;
use Benchmark;
-use lib "../lib";
-
use Deb::Versions;
+use Packages::Config qw( $DBDIR $ROOT $SEARCH_CGI $SEARCH_PAGE
+ @SUITES @SECTIONS @ARCHIVES @ARCHITECTURES );
+use Packages::CGI;
+use Packages::DB;
use Packages::Search qw( :all );
use Packages::HTML ();
-my $thisscript = "search_packages.pl";
-my $use_grep = 1;
-my $HOME = "http://www.debian.org";
-my $ROOT = "";
-my $SEARCHPAGE = "http://packages.debian.org/";
-my @SUITES = qw( oldstable stable testing unstable experimental );
-my @DISTS = @SUITES;
-my @SECTIONS = qw( main contrib non-free );
-my @ARCHIVES = qw( us security installer );
-my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
- kfreebsd-i386 mips mipsel powerpc s390 sparc );
+&Packages::CGI::reset;
$ENV{PATH} = "/bin:/usr/bin";
# Read in all the variables set by the form
-my $input = new CGI;
+my $input;
+if ($ARGV[0] && ($ARGV[0] eq 'php')) {
+ $input = new CGI(\*STDIN);
+} else {
+ $input = new CGI;
+}
my $pet0 = new Benchmark;
+my $tet0 = new Benchmark;
# use this to disable debugging in production mode completly
-my $debug_allowed = 0;
+my $debug_allowed = 1;
my $debug = $debug_allowed && $input->param("debug");
-$Search::Param::debug = 1 if $debug > 1;
+$debug = 0 if !defined($debug) || $debug !~ /^\d+$/o;
+$Packages::CGI::debug = $debug;
+
+&Packages::Config::init( '../' );
+&Packages::DB::init();
+
+if (my $path = $input->param('path')) {
+ my @components = map { lc $_ } split /\//, $path;
+
+ my %SUITES = map { $_ => 1 } @SUITES;
+ my %SECTIONS = map { $_ => 1 } @SECTIONS;
+ my %ARCHIVES = map { $_ => 1 } @ARCHIVES;
+ my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES;
+
+ foreach (@components) {
+ if ($SUITES{$_}) {
+ $input->param('suite', $_);
+ } elsif ($SECTIONS{$_}) {
+ $input->param('section', $_);
+ } elsif ($ARCHIVES{$_}) {
+ $input->param('archive', $_);
+ } elsif ($ARCHITECTURES{$_}) {
+ $input->param('arch', $_);
+ } elsif ($_ eq 'source') {
+ $input->param('searchon','sourcenames');
+ }
+ }
+}
-# If you want, just print out a list of all of the variables and exit.
-print $input->header if $debug;
-# print $input->dump;
-# exit;
+my ( $format, $keyword, $case, $subword, $exact, $searchon,
+ @suites, @sections, @archives, @archs );
-my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' },
- suite => { default => 'stable', match => '^(\w+)$',
+my %params_def = ( keywords => { default => undef,
+ match => '^\s*([-+\@\w\/.:]+)\s*$',
+ var => \$keyword },
+ suite => { default => 'stable', match => '^([\w-]+)$',
alias => 'version', array => ',',
+ var => \@suites,
replace => { all => \@SUITES } },
- case => { default => 'insensitive', match => '^(\w+)$' },
+ archive => { default => 'all', match => '^([\w-]+)$',
+ array => ',', var => \@archives,
+ replace => { all => \@ARCHIVES } },
+ case => { default => 'insensitive', match => '^(\w+)$',
+ var => \$case },
official => { default => 0, match => '^(\w+)$' },
- use_cache => { default => 1, match => '^(\w+)$' },
- subword => { default => 0, match => '^(\w+)$' },
- exact => { default => undef, match => '^(\w+)$' },
- searchon => { default => 'all', match => '^(\w+)$' },
+ subword => { default => 0, match => '^(\w+)$',
+ var => \$subword },
+ exact => { default => undef, match => '^(\w+)$',
+ var => \$exact },
+ searchon => { default => 'all', match => '^(\w+)$',
+ var => \$searchon },
section => { default => 'all', match => '^([\w-]+)$',
alias => 'release', array => ',',
+ var => \@sections,
replace => { all => \@SECTIONS } },
arch => { default => 'any', match => '^(\w+)$',
- array => ',', replace =>
+ array => ',', var => \@archs, replace =>
{ any => \@ARCHITECTURES } },
- archive => { default => 'all', match => '^(\w+)$',
- array => ',', replace =>
- { all => \@ARCHIVES } },
- format => { default => 'html', match => '^(\w+)$' },
+ format => { default => 'html', match => '^(\w+)$',
+ var => \$format },
);
-my %params = Packages::Search::parse_params( $input, \%params_def );
+my %opts;
+my %params = Packages::Search::parse_params( $input, \%params_def, \%opts );
-my $format = $params{values}{format}{final};
#XXX: Don't use alternative output formats yet
$format = 'html';
-
if ($format eq 'html') {
- print $input->header;
-} elsif ($format eq 'xml') {
-# print $input->header( -type=>'application/rdf+xml' );
- print $input->header( -type=>'text/plain' );
+ print $input->header( -charset => 'utf-8' );
}
if ($params{errors}{keywords}) {
- print "Error: keyword not valid or missing" if $format eq 'html';
- exit 0;
+ fatal_error( "keyword not valid or missing" );
+} elsif (length($keyword) < 2) {
+ fatal_error( "keyword too short (keywords need to have at least two characters)" );
}
-my $keyword = $params{values}{keywords}{final};
-my @suites = @{$params{values}{suite}{final}};
-my $official = $params{values}{official}{final};
-my $use_cache = $params{values}{use_cache}{final};
-my $case = $params{values}{case}{final};
+
my $case_bool = ( $case !~ /insensitive/ );
-my $subword = $params{values}{subword}{final};
-my $exact = $params{values}{exact}{final};
$exact = !$subword unless defined $exact;
-my $searchon = $params{values}{searchon}{final};
-my @sections = @{$params{values}{section}{final}};
-my @archs = @{$params{values}{arch}{final}};
-my $page = $params{values}{page}{final};
-my $results_per_page = $params{values}{number}{final};
+$opts{h_suites} = { map { $_ => 1 } @suites };
+$opts{h_sections} = { map { $_ => 1 } @sections };
+$opts{h_archives} = { map { $_ => 1 } @archives };
+$opts{h_archs} = { map { $_ => 1 } @archs };
# for URL construction
my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
@@ -114,216 +135,42 @@ my $sections_param = join ',', @{$params{values}{section}{no_replace}};
my $archs_param = join ',', @{$params{values}{arch}{no_replace}};
# for output
-my $keyword_enc = encode_entities $keyword;
+my $keyword_enc = encode_entities $keyword || '';
my $searchon_enc = encode_entities $searchon;
my $suites_enc = encode_entities join ', ', @{$params{values}{suite}{no_replace}};
my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_replace}};
my $archs_enc = encode_entities join ', ', @{$params{values}{arch}{no_replace}};
my $pet1 = new Benchmark;
my $petd = timediff($pet1, $pet0);
-print "DEBUG: Parameter evaluation took ".timestr($petd)." $file You have searched for ${source_wording}packages $exact_wording $keyword_enc in $suite_wording, $section_wording, and $arch_wording. You have searched for $keyword_enc in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording. Can't find that package. Can't find that package, at least not in that suite ".
- ( $search_on_sources ? "" : " and on that architecture" ).
- ". You have searched only for exact matches of the package name. You can try to search for package names that contain your search string. Can't find that string. Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc). You have searched only for words exactly matching your keywords. You can try to search allowing subword matching. ".( $printed ? "Or you" : "You" )." can try a different search on the Packages search page. $index_line Found $no_results matching packages,";
- if ($end == $start) {
- print " displaying package $end.
" if $debug;
-
-if ($format eq 'html') {
-print Packages::HTML::header( title => 'Package Search Results' ,
- lang => 'en',
- title_tag => 'Debian Package Search Results',
- print_title_above => 1,
- print_search_field => 'packages',
- search_field_values => {
- keywords => $keyword_enc,
- searchon => $searchon,
- arch => $archs_enc,
- suite => $suites_enc,
- section => $sections_enc,
- subword => $subword,
- exact => $exact,
- case => $case,
- },
- );
-}
-
-# read the configuration
-my $topdir;
-if (!open (C, "../config.sh")) {
- print "\nInternal Error: Cannot open configuration file.\n\n" if $format eq 'html';
- exit 0;
-}
-while ($cached
" if $debug;
-} else {
- my $searchkeyword = $keyword;
- my $grep_searchkeyword = $keyword;
- $searchkeyword =~ s/[.]/\\./;
- if (($searchon eq 'names') || ($searchon eq 'sourcenames')) {
- # asserting that all package names are lower case
- $searchkeyword = lc($searchkeyword) unless $case_bool;
- $case_bool = 1;
- $grep_searchkeyword = "^[^ ]*$searchkeyword" unless $exact;
- $searchkeyword = "^\\S*$searchkeyword" unless $exact;
- } else {
- $grep_searchkeyword = "\\(^$searchkeyword\\b\\|\\b$searchkeyword\\b\\)"
- if $subword != 1;
- $searchkeyword = "\\b$searchkeyword\\b"
- if $subword != 1;
- }
-
-# FIXME
-# check if the Packages files are there
-#my @files = glob ("$fdir/$file");
-#if ($#files == -1) {
-# XXX has to be updated for new architectures
-# if ($format eq 'html') {
-# if (($version eq "stable" and $arch =~ /^(hurd|sh)$/)
-# || ($version eq "oldstable" and $arch =~ /^amd64$/)) {
-# print "Error: the $arch architecture didn't exist in $version.
\n"
-# ."Please go back and choose a different distribution.\n";
-# } else {
-# print "Error: Packages/Sources file not found.
\n"
-# ."If the problem persists, please inform $ENV{SERVER_ADMIN}.\n";
-# printf "
"
- if $debug > 1;
-
- tie %packages, 'DB_File', $file, O_RDONLY
- or die "Couldn't open packages file $file: $!";
-
- if (my $data = $packages{$searchkeyword}) {
- print "DEBUG: Found result $data
"
- if $debug > 1;
- push @results, "$file:$data";
- }
- }
- } else {
- my $file;
- if ($search_on_sources) {
- $file = "$FLATDIR/$s/$sec/Sources.$archive";
- } else {
- $file = "$FLATDIR/$s/$sec/Packages-$a.$archive";
- }
- if (-f $file) {
- print "DEBUG: Use file $file
"
- if $debug > 1;
-
- # use_grep is currently way faster, though
- # I can't pinpoint exactly why, yet
- # most probably the perl regexes are
- # slow compared to the simpler grep
- # regexes
- unless ($use_grep) {
- open my $pkg_fh, '<', $file
- or die "Couldn't open packages file $file: $!";
-
- foreach (<$pkg_fh>) {
- if (/$searchkeyword/o) {
- print "DEBUG: Found result $_
"
- if $debug > 1;
+unless (@Packages::CGI::fatal_errors) {
- push @results, "$file:$_";
- }
- }
- } else {
- push @files, $file;
- }
- }
- }
- }
- }
- }
- }
-
- if ($use_grep) {
- if (@files) {
- my @grep = ( 'grep', '-H' );
- push @grep, '-i' unless $case_bool;
- push @grep, $grep_searchkeyword;
- push @grep, @files;
-
- print "DEBUG: starting grep command '".
- substr("@grep",0,100)."[...]'
" if $debug;
- open my $grep_out, '-|', @grep or
- die "grep failed: $!";
- @results = <$grep_out>;
- }
+ if ($searchon eq 'names') {
+ push @results, @{ do_names_search( $keyword, \%packages,
+ $p_obj,
+ \&read_entry, \%opts ) };
+ } elsif ($searchon eq 'sourcenames') {
+ push @results, @{ do_names_search( $keyword, \%sources,
+ $sp_obj,
+ \&read_src_entry, \%opts ) };
+ } else {
+ push @results, @{ do_names_search( $keyword, \%packages,
+ $p_obj,
+ \&read_entry, \%opts ) };
+ push @results, @{ do_fulltext_search( $keyword, "$DBDIR/descriptions.txt",
+ \%did2pkg,
+ \%packages,
+ \&read_entry, \%opts ) };
}
-
- $cache{$cache_key} = join "", @results;
}
my $st1 = new Benchmark;
my $std = timediff($st1, $st0);
-print "DEBUG: Search took ".timestr($std)."
" if $debug;
+debug( "Search took ".timestr($std) );
if ($format eq 'html') {
my $suite_wording = $suites_enc eq "all" ? "all suites"
@@ -333,16 +180,20 @@ if ($format eq 'html') {
my $arch_wording = $archs_enc eq 'any' ? "all architectures"
: "architecture(s) $archs_enc";
if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
- my $source_wording = $search_on_sources ? "source " : "";
+ my $source_wording = ( $searchon eq 'sourcenames' ) ? "source " : "";
my $exact_wording = $exact ? "named" : "that names contain";
- print "Package %s
\n", $pkg;
- print "\n";
- foreach $ver (@SUITES) {
- if (exists $pkgs{$pkg}{$ver}) {
- my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
- my $part_str = "";
- if ($part{$pkg}{$ver}{$versions[0]}) {
- $part_str = "[$part{$pkg}{$ver}{$versions[0]}]";
- }
- printf "
\n";
}
- } elsif ($format eq 'xml') {
- require RDF::Simple::Serialiser;
- my $rdf = new RDF::Simple::Serialiser;
- $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
- my @triples;
- foreach my $pkg (sort keys %pkgs) {
- foreach $ver (@DISTS) {
- if (exists $pkgs{$pkg}{$ver}) {
- my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
- foreach my $version (@versions) {
- my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version";
- push @triples, [ $id, 'debpkg:package', $pkg ];
- push @triples, [ $id, 'debpkg:version', $version ];
- push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ];
- push @triples, [ $id, 'debpkg:suite', $ver ];
- push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ];
- push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ];
- foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) {
- push @triples, [ $id, 'debpkg:architecture', $arch ];
+
+my @pkgs = sort(keys %pkgs, keys %provided_by);
+ if ($opts{format} eq 'html') {
+ my ($start, $end) = multipageheader( $input, scalar @pkgs, \%opts );
+ my $count = 0;
+
+ foreach my $pkg (@pkgs) {
+ $count++;
+ next if $count < $start or $count > $end;
+ printf "
%s: %s\n",
- $v, join (" ", (sort keys %{$pkgs{$pkg}{$ver}{$v}}) );
- }
- print "Package %s
\n", $pkg;
+ print "\n";
+ foreach my $suite (@SUITES) {
+ foreach my $archive (@ARCHIVES) {
+ my $path = $suite.(($archive ne 'us')?"/$archive":'');
+ if (exists $pkgs{$pkg}{$suite}{$archive}) {
+ my @versions = version_sort keys %{$pkgs{$pkg}{$suite}{$archive}};
+ my $origin_str = "";
+ if ($sect{$pkg}{$suite}{$archive}{$versions[0]}) {
+ $origin_str .= " [$sect{$pkg}{$suite}{$versions[0]}]";
+ }
+ printf "
\n";
}
}
+ } else {
+ foreach (@results) {
+ my ($pkg, $archive, $suite, $section, $subsection, $priority,
+ $version) = @$_;
- print $rdf->serialise(@triples);
- }
-} else {
- foreach my $line (@results) {
- chomp($line);
- @colon = split (/:/, $line);
- ($package, $section, $ver, $binaries) = split (/ /, $#colon >1 ? $colon[1].":".$colon[2]:$colon[1], 4);
- $section =~ s,^(non-free|contrib)/,,;
- $section =~ s,^non-US.*$,non-US,,;
- $colon[0] =~ m,.*/([^/]+)/([^/]+)/Sources\.,; #$1=stable, $2=main
-
- my ($suite, $part) = ($1, $2);
- $pkgs{$package}{$suite} = $ver;
- $sect{$package}{$suite}{source} = $section;
- $part{$package}{$suite}{source} = $part unless $part eq 'main';
-
- $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ];
-
- }
+ $pkgs{$pkg}{$suite}{$archive} = $version;
+ $subsect{$pkg}{$suite}{$archive}{source} = $subsection;
+ $sect{$pkg}{$suite}{$archive}{source} = $section
+ unless $section eq 'main';
- if ($format eq 'html') {
- my ($start, $end) = multipageheader( scalar keys %pkgs );
- my $count = 0;
-
- foreach my $pkg (sort keys %pkgs) {
- $count++;
- next if ($count < $start) or ($count > $end);
- printf "
%s: %s\n",
+ $v, join (" ", (sort keys %{$pkgs{$pkg}{$suite}{$archive}{$v}}) );
+ }
+ if (my $provided_by = $provided_by{$pkg}{$suite}{$archive}) {
+ print '
also provided by: ',
+ join( ', ', map { "$_" } @$provided_by);
+ }
+ print "
",
+ $path, $pkg, $path;
+ print 'provided by: ',
+ join( ', ', map { "$_" } @$provided_by);
}
}
}
+ print "Source package %s
\n", $pkg;
- print "\n";
- foreach $ver (@DISTS) {
- if (exists $pkgs{$pkg}{$ver}) {
- my $part_str = "";
- if ($part{$pkg}{$ver}{source}) {
- $part_str = "[$part{$pkg}{$ver}{source}]";
- }
- printf "
\n";
+ $binaries{$pkg}{$suite}{$archive} = find_binaries( $pkg, $archive, $suite, \%src2bin );
}
- } elsif ($format eq 'xml') {
- require RDF::Simple::Serialiser;
- my $rdf = new RDF::Simple::Serialiser;
- $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
- my @triples;
- foreach my $pkg (sort keys %pkgs) {
- foreach $ver (@DISTS) {
- if (exists $pkgs{$pkg}{$ver}) {
- my $id = "$ROOT/$ver/source/$pkg";
- push @triples, [ $id, 'debpkg:package', $pkg ];
- push @triples, [ $id, 'debpkg:type', 'source' ];
- push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{source} ];
- push @triples, [ $id, 'debpkg:version', $pkgs{$pkg}{$ver} ];
- push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{source} || 'main' ];
-
- foreach my $bp (@{$binaries{$pkg}{$ver}}) {
- push @triples, [ $id, 'debpkg:binary', $bp ];
+ if ($opts{format} eq 'html') {
+ my ($start, $end) = multipageheader( $input, scalar keys %pkgs, \%opts );
+ my $count = 0;
+
+ foreach my $pkg (sort keys %pkgs) {
+ $count++;
+ next if ($count < $start) or ($count > $end);
+ printf "
Binary packages: ";
- my @bp_links;
- foreach my $bp (@{$binaries{$pkg}{$ver}}) {
- my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || '';
- $sect =~ s,^(non-free|contrib)/,,;
- $sect =~ s,^non-US.*$,non-US,,;
- my $bp_link;
- if ($sect) {
- $bp_link = sprintf "%s", $ver, $sect, uri_escape( $bp ), $bp;
- } else {
- $bp_link = $bp;
- }
- push @bp_links, $bp_link;
- }
- print join( ", ", @bp_links );
- print "Source package %s
\n", $pkg;
+ print "\n";
+ foreach my $suite (@SUITES) {
+ foreach my $archive (@ARCHIVES) {
+ if (exists $pkgs{$pkg}{$suite}{$archive}) {
+ my $origin_str = "";
+ if ($sect{$pkg}{$suite}{$archive}{source}) {
+ $origin_str .= " [$sect{$pkg}{$suite}{$archive}{source}]";
+ }
+ printf( "
\n";
}
}
- print $rdf->serialise(@triples);
}
+ printindexline( $input, scalar keys %pkgs, \%opts );
}
+#print_results(\@results, \%opts) if @results;;
+my $tet1 = new Benchmark;
+my $tetd = timediff($tet1, $tet0);
+print "Total page evaluation took ".timestr($tetd)."
Binary packages: ";
+ my @bp_links;
+ foreach my $bp (@{$binaries{$pkg}{$suite}{$archive}}) {
+ my $bp_link = sprintf( "%s",
+ $suite.(($archive ne 'us')?"/$archive":''), uri_escape( $bp ), $bp );
+ push @bp_links, $bp_link;
+ }
+ print join( ", ", @bp_links );
+ print "
"
+ if $debug_allowed;
-if ($format eq 'html') {
- &printindexline( scalar keys %pkgs );
- &printfooter;
-}
+my $trailer = Packages::HTML::trailer( $ROOT );
+$trailer =~ s/LAST_MODIFIED_DATE/gmtime()/e; #FIXME
+print $trailer;
-exit;
-
-sub printindexline {
- my $no_results = shift;
-
- my $index_line;
- if ($no_results > $results_per_page) {
-
- $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results);
-
- print "
Results per page: "; - my @resperpagelinks; - for (50, 100, 200) { - if ($results_per_page == $_) { - push @resperpagelinks, $_; - } else { - push @resperpagelinks, resperpagelink($input,\%params,$_); - } - } - if ($params{values}{number}{final} =~ /^all$/i) { - push @resperpagelinks, "all"; - } else { - push @resperpagelinks, resperpagelink($input, \%params,"all"); - } - print join( " | ", @resperpagelinks )."
"; - } - return ( $start, $end ); -} - -sub printfooter { -print <