From f49363dc272472174dd57c663c2688c33af927cd Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Fri, 3 Feb 2006 20:03:19 +0000 Subject: [PATCH] These changes make the search_packages.pl script mod_perl ready and try to make some optimisations for this (e.g. tie'ing the Databases only once) --- cgi-bin/search_packages.pl | 492 +++++++++++-------------------------- conf/apache.conf | 17 +- config.sh | 12 +- lib/Packages/HTML.pm | 28 +-- lib/Packages/Search.pm | 230 +++++++++++++++-- 5 files changed, 380 insertions(+), 399 deletions(-) diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl index b62821d..25b0e13 100755 --- a/cgi-bin/search_packages.pl +++ b/cgi-bin/search_packages.pl @@ -20,25 +20,12 @@ use HTML::Entities; use DB_File; use Benchmark; -use lib "../lib"; - use Deb::Versions; +use Packages::CGI; use Packages::Search qw( :all ); use Packages::HTML (); -my $thisscript = $Packages::HTML::SEARCH_CGI; -my $HOME = "http://www.debian.org"; -my $ROOT = ""; -my $SEARCHPAGE = "http://packages.debian.org/"; -my @SUITES = qw( oldstable stable testing unstable experimental ); -my @SECTIONS = qw( main contrib non-free ); -my @ARCHIVES = qw( us security installer ); -my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64 - kfreebsd-i386 mips mipsel powerpc s390 sparc ); -my %SUITES = map { $_ => 1 } @SUITES; -my %SECTIONS = map { $_ => 1 } @SECTIONS; -my %ARCHIVES = map { $_ => 1 } @ARCHIVES; -my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES; +&Packages::CGI::reset; $ENV{PATH} = "/bin:/usr/bin"; @@ -51,22 +38,60 @@ if ($ARGV[0] && ($ARGV[0] eq 'php')) { } my $pet0 = new Benchmark; +my $tet0 = new Benchmark; # use this to disable debugging in production mode completly my $debug_allowed = 1; my $debug = $debug_allowed && $input->param("debug"); -$debug = 0 if not defined($debug); -#$Packages::Search::debug = 1 if $debug > 1; +$debug = 0 if !defined($debug) || $debug !~ /^\d+$/o; +$Packages::CGI::debug = $debug; + +# read the configuration +our $config_read_time ||= 0; +our $db_read_time ||= 0; +our $topdir; +our $ROOT; +our @SUITES; +our @SECTIONS; +our @ARCHITECTURES; + +# FIXME: move to own module +my $modtime = (stat( "../config.sh" ))[9]; +if ($modtime > $config_read_time) { + if (!open (C, '<', "../config.sh")) { + error( "Internal Error: Cannot open configuration file." ); + } + while () { + chomp; + $topdir = $1 if /^\s*topdir="?([^\"]*)"?\s*$/o; + $ROOT = $1 if /^\s*root="?([^\"]*)"?\s*$/o; + $Packages::HTML::HOME = $1 if /^\s*home="?([^\"]*)"?\s*$/o; + $Packages::HTML::SEARCH_CGI = $1 if /^\s*searchcgi="?([^\"]*)"?\s*$/o; + $Packages::HTML::SEARCH_PAGE = $1 if /^\s*searchpage="?([^\"]*)"?\s*$/o; + $Packages::HTML::WEBMASTER_MAIL = $1 if /^\s*webmaster="?([^\"]*)"?\s*$/o; + $Packages::HTML::CONTACT_MAIL = $1 if /^\s*contact="?([^\"]*)"?\s*$/o; + @SUITES = split(/\s+/, $1) if /^\s*suites="?([^\"]*)"?\s*$/o; + @SECTIONS = split(/\s+/, $1) if /^\s*sections="?([^\"]*)"?\s*$/o; + @ARCHITECTURES = split(/\s+/, $1) if /^\s*architectures="?([^\"]*)"?\s*$/o; + } + close (C); + debug( "read config ($modtime > $config_read_time)" ); + $config_read_time = $modtime; +} +my $DBDIR = $topdir . "/files/db"; +my $thisscript = $Packages::HTML::SEARCH_CGI; if (my $path = $input->param('path')) { my @components = map { lc $_ } split /\//, $path; + my %SUITES = map { $_ => 1 } @SUITES; + my %SECTIONS = map { $_ => 1 } @SECTIONS; + my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES; + foreach (@components) { if ($SUITES{$_}) { $input->param('suite', $_); } elsif ($SECTIONS{$_}) { $input->param('section', $_); - } elsif ($ARCHIVES{$_}) { - $input->param('archive', $_); }elsif ($ARCHITECTURES{$_}) { $input->param('arch', $_); } @@ -85,8 +110,7 @@ my %params_def = ( keywords => { default => undef, replace => { all => \@SUITES } }, case => { default => 'insensitive', match => '^(\w+)$', var => \$case }, -# official => { default => 0, match => '^(\w+)$' }, -# use_cache => { default => 1, match => '^(\w+)$' }, + official => { default => 0, match => '^(\w+)$' }, subword => { default => 0, match => '^(\w+)$', var => \$subword }, exact => { default => undef, match => '^(\w+)$', @@ -100,9 +124,6 @@ my %params_def = ( keywords => { default => undef, arch => { default => 'any', match => '^(\w+)$', array => ',', var => \@archs, replace => { any => \@ARCHITECTURES } }, - archive => { default => 'all', match => '^(\w+)$', - array => ',', replace => - { all => \@ARCHIVES } }, format => { default => 'html', match => '^(\w+)$', var => \$format }, ); @@ -115,54 +136,10 @@ if ($format eq 'html') { print $input->header; } -my (@errors, @debug, @msgs, @hints); -sub error { - push @errors, $_[0]; -} -sub hint { - push @hints, $_[0]; -} -sub debug { - my $lvl = $_[1] || 0; - push(@debug, $_[0]) if $debug > $lvl; -} -sub msg { - push @msgs, $_[0]; -} -sub print_errors { - return unless @errors; - print '
'; - foreach (@errors) { - print "

$_

"; - } - print '
'; -} -sub print_debug { - return unless $debug && @debug; - print '
'; - print '

Debugging:

';
-    foreach (@debug) {
-	print "$_\n";
-    }
-    print '
'; - -} -sub print_hints { - return unless @hints; - print '
'; - foreach (@hints) { - print "

$_

"; - } - print '
'; -} -sub print_msgs { - foreach (@msgs) { - print "

$_

"; - } -} - if ($params{errors}{keywords}) { - error( "Error: keyword not valid or missing" ); + fatal_error( "keyword not valid or missing" ); +} elsif (length($keyword) < 2) { + fatal_error( "keyword too short (keywords need to have at least two characters)" ); } my $case_bool = ( $case !~ /insensitive/ ); @@ -177,7 +154,7 @@ my $sections_param = join ',', @{$params{values}{section}{no_replace}}; my $archs_param = join ',', @{$params{values}{arch}{no_replace}}; # for output -my $keyword_enc = encode_entities $keyword; +my $keyword_enc = encode_entities $keyword || ''; my $searchon_enc = encode_entities $searchon; my $suites_enc = encode_entities join ', ', @{$params{values}{suite}{no_replace}}; my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_replace}}; @@ -186,198 +163,56 @@ my $pet1 = new Benchmark; my $petd = timediff($pet1, $pet0); debug( "Parameter evaluation took ".timestr($petd) ); -# read the configuration -my $topdir; -if (!open (C, "../config.sh")) { - error( "Internal Error: Cannot open configuration file." ); -} -while () { - $topdir = $1 if /^\s*topdir="?(.*)"?\s*$/; - $ROOT = $1 if /^\s*root="?(.*)"?\s*$/; -} -close (C); - -my $DBDIR = $topdir . "/files/db"; -my $search_on_sources = 0; - my $st0 = new Benchmark; my @results; -my $too_many_hits; -if ($searchon eq 'sourcenames') { - $search_on_sources = 1; -} -sub print_header { - print Packages::HTML::header( title => 'Package Search Results' , - lang => 'en', - title_tag => 'Debian Package Search Results', - print_title_above => 1, - print_search_field => 'packages', - search_field_values => { - keywords => $keyword_enc, - searchon => $searchon, - arch => $archs_enc, - suite => $suites_enc, - section => $sections_enc, - subword => $subword, - exact => $exact, - case => $case, - }, - ); -} - -sub read_entry { - my ($hash, $key, $results, $opts) = @_; - my $result = $hash->{$key} || ''; - foreach (split /\000/, $result) { - my @data = split ( /\s/, $_, 7 ); - debug( "Considering entry ".join( ':', @data), 2); - if ($opts->{h_suites}{$data[0]} - && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all') - && $opts->{h_sections}{$data[2]}) { - debug( "Using entry ".join( ':', @data), 2); - push @$results, [ $key, @data ]; - } - } -} -sub read_src_entry { - my ($hash, $key, $results, $opts) = @_; - my $result = $hash->{$key} || ''; - foreach (split /\000/, $result) { - my @data = split ( /\s/, $_, 5 ); - debug( "Considering entry ".join( ':', @data), 2); - if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) { - debug( "Using entry ".join( ':', @data), 2); - push @$results, [ $key, @data ]; - } - } -} -sub do_names_search { - my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_; - my @results; - - $keyword = lc $keyword unless $opts->{case_bool}; - - my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/$file: $!"; - - if ($opts->{exact}) { - &$read_entry( \%packages, $keyword, \@results, $opts ); - } else { - my ($key, $prefixes) = ($keyword, ''); - my %pkgs; - my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie postfix db $DBDIR/$postfix_file: $!"; - $p_obj->seq( $key, $prefixes, R_CURSOR ); - while (index($key, $keyword) >= 0) { - if ($prefixes =~ /^\001(\d+)/o) { - $too_many_hits += $1; - } else { - foreach (split /\000/o, $prefixes) { - $_ = '' if $_ eq '^'; - debug( "add word $_$key", 2); - $pkgs{$_.$key}++; - } - } - last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0; - last if $too_many_hits or keys %pkgs >= 100; - } - - my $no_results = keys %pkgs; - if ($too_many_hits || ($no_results >= 100)) { - $too_many_hits += $no_results; - %pkgs = ( $keyword => 1 ); - } - foreach my $pkg (sort keys %pkgs) { - &$read_entry( \%packages, $pkg, \@results, $opts ); - } - } - return \@results; -} -sub do_fulltext_search { - my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_; - my @results; - - my @lines; - my $regex; - if ($opts->{case_bool}) { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/o; - } else { - $regex = qr/\Q$keyword\E/o; - } - } else { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/io; - } else { - $regex = qr/\Q$keyword\E/io; - } - } - - open DESC, '<', "$DBDIR/$file" - or die "couldn't open $DBDIR/$file: $!"; - while () { - $_ =~ $regex or next; - debug( "Matched line $.", 2); - push @lines, $.; - } - close DESC; - - tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/$lookup: $!"; - tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE - or die "couldn't tie DB $DBDIR/$mapping: $!"; - - my %tmp_results; - foreach my $l (@lines) { - my $result = $did2pkg{$l}; - foreach (split /\000/o, $result) { - my @data = split /\s/, $_, 3; - next unless $opts->{h_archs}{$data[2]}; - $tmp_results{$data[0]}++; - } - } - foreach my $pkg (keys %tmp_results) { - &$read_entry( \%packages, $pkg, \@results, $opts ); +our ($obj, $s_obj, $p_obj, $sp_obj, + %packages, %sources, %postf, %spostf, %src2bin, %did2pkg ); + +unless (@Packages::CGI::fatal_errors) { + + my $dbmodtime = (stat("$DBDIR/packages_small.db"))[9]; + if ($dbmodtime > $db_read_time) { + $obj = tie %packages, 'DB_File', "$DBDIR/packages_small.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/packages_small.db: $!"; + $s_obj = tie %sources, 'DB_File', "$DBDIR/sources_small.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/sources_small.db: $!"; + $p_obj = tie %postf, 'DB_File', "$DBDIR/package_postfixes.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie postfix db $DBDIR/package_postfixes.db: $!"; + $sp_obj = tie %spostf, 'DB_File', "$DBDIR/source_postfixes.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie postfix db $DBDIR/source_postfixes.db: $!"; + tie %src2bin, 'DB_File', "$DBDIR/sources_packages.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't open $DBDIR/sources_packages.db: $!"; + tie %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db", + O_RDONLY, 0666, $DB_BTREE + or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!"; + + debug( "tied databases ($dbmodtime > $db_read_time)" ); + $db_read_time = $dbmodtime; } - return \@results; -} - -sub find_binaries { - my ($pkg, $suite) = @_; - - tie my %src2bin, 'DB_File', "$DBDIR/sources_packages.db", O_RDONLY, 0666, $DB_BTREE - or die "couldn't open $DBDIR/sources_packages.db: $!"; - my $bins = $src2bin{$pkg} || ''; - my %bins; - foreach (split /\000/o, $bins) { - my @data = split /\s/, $_, 4; - - if ($data[0] eq $suite) { - $bins{$data[1]}++; - } + if ($searchon eq 'names') { + push @results, @{ do_names_search( $keyword, \%packages, + $p_obj, + \&read_entry, \%opts ) }; + } elsif ($searchon eq 'sourcenames') { + push @results, @{ do_names_search( $keyword, \%sources, + $sp_obj, + \&read_src_entry, \%opts ) }; + } else { + push @results, @{ do_names_search( $keyword, \%packages, + $p_obj, + \&read_entry, \%opts ) }; + push @results, @{ do_fulltext_search( $keyword, "$DBDIR/descriptions.txt", + \%did2pkg, + \%packages, + \&read_entry, \%opts ) }; } - - return [ keys %bins ]; -} - -if ($searchon eq 'names') { - push @results, @{ do_names_search( $keyword, 'packages_small.db', - 'package_postfixes.db', - \&read_entry, \%opts ) }; -} elsif ($searchon eq 'sourcenames') { - push @results, @{ do_names_search( $keyword, 'sources_small.db', - 'source_postfixes.db', - \&read_src_entry, \%opts ) }; -} else { - push @results, @{ do_names_search( $keyword, 'packages_small.db', - 'package_postfixes.db', - \&read_entry, \%opts ) }; - push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt', - 'descriptions_packages.db', - 'packages_small.db', - \&read_entry, \%opts ) }; } my $st1 = new Benchmark; @@ -392,7 +227,7 @@ if ($format eq 'html') { my $arch_wording = $archs_enc eq 'any' ? "all architectures" : "architecture(s) $archs_enc"; if (($searchon eq "names") || ($searchon eq 'sourcenames')) { - my $source_wording = $search_on_sources ? "source " : ""; + my $source_wording = ( $searchon eq 'sourcenames' ) ? "source " : ""; my $exact_wording = $exact ? "named" : "that names contain"; msg( "You have searched for ${source_wording}packages $exact_wording $keyword_enc in $suite_wording, $section_wording, and $arch_wording." ); } else { @@ -401,11 +236,11 @@ if ($format eq 'html') { } } -if ($too_many_hits) { - error( "Your search was too wide so we will only display exact matches. At least $too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ); +if ($Packages::Search::too_many_hits) { + error( "Your search was too wide so we will only display exact matches. At least $Packages::Search::too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ); } -if (!@results) { +if (!@Packages::CGI::fatal_errors && !@results) { if ($format eq 'html') { my $keyword_esc = uri_escape( $keyword ); my $printed = 0; @@ -416,10 +251,11 @@ if (!@results) { error( "Can't find that package." ); } else { error( "Can't find that package, at least not in that suite ". - ( $search_on_sources ? "" : " and on that architecture" ) ) + ( ( $searchon eq 'sourcenames' ) ? "" : " and on that architecture" ) ) } if ($exact) { + $printed++; hint( "You have searched only for exact matches of the package name. You can try to search for package names that contain your search string." ); } } else { @@ -432,28 +268,40 @@ if (!@results) { } unless ($subword) { + $printed++; hint( "You have searched only for words exactly matching your keywords. You can try to search allowing subword matching." ); } } - hint( ( @hints ? "Or you" : "You" )." can try a different search on the Packages search page." ); + hint( ( $printed ? "Or you" : "You" )." can try a different search on the Packages search page." ); } } -print_header; -print_msgs; -print_errors; -print_hints; -print_debug; -&print_results; -&printfooter; - -sub print_results { - return unless @results; - +print Packages::HTML::header( title => 'Package Search Results' , + lang => 'en', + title_tag => 'Debian Package Search Results', + print_title_above => 1, + print_search_field => 'packages', + search_field_values => { + keywords => $keyword_enc, + searchon => $searchon, + arch => $archs_enc, + suite => $suites_enc, + section => $sections_enc, + subword => $subword, + exact => $exact, + case => $case, + debug => $debug, + }, + ); +print_msgs(); +print_errors(); +print_hints(); +print_debug(); +if (@results) { my (%pkgs, %sect, %part, %desc, %binaries); - unless ($search_on_sources) { + unless ($opts{searchon} eq 'sourcenames') { foreach (@results) { my ($pkg_t, $suite, $arch, $section, $subsection, $priority, $version, $desc) = @$_; @@ -467,8 +315,8 @@ sub print_results { $desc{$pkg}{$suite}{$version} = $desc; } - if ($format eq 'html') { - my ($start, $end) = multipageheader( scalar keys %pkgs ); + if ($opts{format} eq 'html') { + my ($start, $end) = multipageheader( $input, scalar keys %pkgs, \%opts ); my $count = 0; foreach my $pkg (sort keys %pkgs) { @@ -507,11 +355,11 @@ sub print_results { $part{$pkg}{$suite}{source} = $section unless $section eq 'main'; - $binaries{$pkg}{$suite} = find_binaries( $pkg, $suite ); + $binaries{$pkg}{$suite} = find_binaries( $pkg, $suite, \%src2bin ); } - if ($format eq 'html') { - my ($start, $end) = multipageheader( scalar keys %pkgs ); + if ($opts{format} eq 'html') { + my ($start, $end) = multipageheader( $input, scalar keys %pkgs, \%opts ); my $count = 0; foreach my $pkg (sort keys %pkgs) { @@ -544,78 +392,16 @@ sub print_results { } } } - printindexline( scalar keys %pkgs ); -} - -exit; - -sub printindexline { - my $no_results = shift; - - my $index_line; - if ($no_results > $opts{number}) { - - $index_line = prevlink($input,\%params)." | ". - indexline( $input, \%params, $no_results)." | ". - nextlink($input,\%params, $no_results); - - print "

$index_line

"; - } -} - -sub multipageheader { - my $no_results = shift; - - my ($start, $end); - if ($opts{number} =~ /^all$/i) { - $start = 1; - $end = $no_results; - $opts{number} = $no_results; - } else { - $start = Packages::Search::start( \%params ); - $end = Packages::Search::end( \%params ); - if ($end > $no_results) { $end = $no_results; } - } - - print "

Found $no_results matching packages,"; - if ($end == $start) { - print " displaying package $end.

"; - } else { - print " displaying packages $start to $end.

"; - } - - printindexline( $no_results ); - - if ($no_results > 100) { - print "

Results per page: "; - my @resperpagelinks; - for (50, 100, 200) { - if ($opts{number} == $_) { - push @resperpagelinks, $_; - } else { - push @resperpagelinks, resperpagelink($input,\%params,$_); - } - } - if ($params{values}{number}{final} =~ /^all$/i) { - push @resperpagelinks, "all"; - } else { - push @resperpagelinks, resperpagelink($input, \%params,"all"); - } - print join( " | ", @resperpagelinks )."

"; - } - return ( $start, $end ); -} - -sub printfooter { - - my $pete = new Benchmark; - my $petd = timediff($pete, $pet0); - print "Total page evaluation took ".timestr($petd)."
" - if $debug_allowed; - - my $trailer = Packages::HTML::trailer( $ROOT ); - $trailer =~ s/LAST_MODIFIED_DATE/gmtime()/e; #FIXME - print $trailer; + printindexline( $input, scalar keys %pkgs, \%opts ); } +#print_results(\@results, \%opts) if @results;; +my $tet1 = new Benchmark; +my $tetd = timediff($tet1, $tet0); +print "Total page evaluation took ".timestr($petd)."
" + if $debug_allowed; + +my $trailer = Packages::HTML::trailer( $ROOT ); +$trailer =~ s/LAST_MODIFIED_DATE/gmtime()/e; #FIXME +print $trailer; # vim: ts=8 sw=4 diff --git a/conf/apache.conf b/conf/apache.conf index 0fce69b..f6e9e03 100644 --- a/conf/apache.conf +++ b/conf/apache.conf @@ -83,7 +83,22 @@ ErrorLog /var/log/apache/packages.debian.org-error.log CustomLog /var/log/apache/packages.debian.org-access.log combined - ScriptAlias /cgi-bin /org/packages.debian.org/cgi-bin/ +# ScriptAlias /cgi-bin /org/packages.debian.org/cgi-bin/ + + Alias /cgi-bin/ /org/packages.debian.org/cgi-bin/ + + PerlModule Apache::Registry + PerlTaintCheck On + + SetHandler perl-script + PerlRequire /org/packages.debian.org/bin/mod_perl-startup + PerlInitHandler Apache::Reload + PerlHandler Apache::Registry + Options +ExecCGI + PerlSendHeader On + allow from all + + ScriptAlias /cgi-old /org/packages.debian.net/cgi-bin/ RewriteEngine on diff --git a/config.sh b/config.sh index b1a039f..6059fa7 100644 --- a/config.sh +++ b/config.sh @@ -30,12 +30,20 @@ volatile_ftpsite=http://volatile.debian.net/debian-volatile amd64_ftpsite=http://amd64.debian.net/debian kfreebsd_ftpsite=http://kfreebsd-gnu.debian.net/debian +root="" +searchpage="http://packages.debian.net/" +searchcgi="/cgi-bin/search_packages.pl" +webmaster=webmaster@debian.org +contact=debian-www@lists.debian.org +home="http://www.debian.org" + # Architectures # polangs="de fi nl fr uk" ddtplangs="de cs da eo es fi fr hu it ja nl pl pt_BR pt_PT ru sk sv_SE uk" -parts="main contrib non-free" -dists="oldstable stable testing unstable" +sections="main contrib non-free" +suites="oldstable stable testing unstable" +architectures="alpha amd64 arm hppa hurd-i386 i386 ia64 kfreebsd-i386 m68k mips mipsel powerpc s390 sparc" arch_oldstable="alpha arm hppa i386 ia64 m68k mips mipsel powerpc s390 sparc" arch_stable="${arch_oldstable} amd64" arch_testing="${arch_stable}" diff --git a/lib/Packages/HTML.pm b/lib/Packages/HTML.pm index 65a13f4..873a7ea 100644 --- a/lib/Packages/HTML.pm +++ b/lib/Packages/HTML.pm @@ -22,24 +22,11 @@ our @EXPORT = qw( header title trailer file_changed time_stamp ds_begin ds_item ds_end note title marker pdesc pdeplegend pkg_list pmoreinfo ); -our $HOME = "http://www.debian.org"; -our $ROOT = "http://merkel.debian.org/~jeroen/pdo"; # <-- config.sh?! -our $CONTACT_MAIL = 'debian-www@lists.debian.org'; -our $WEBMASTER_MAIL = 'webmaster@debian.org'; -our $SEARCH_PAGE = "$ROOT/"; -our $SEARCH_CGI = "$ROOT/search"; -our $CGI_ROOT = "$ROOT/cgi-bin"; -our $CN_HELP_URL = "${HOME}/intro/cn"; +our ( $HOME, $ROOT, $CONTACT_MAIL, $WEBMASTER_MAIL, + $SEARCH_PAGE, $SEARCH_CGI, $SEARCH_URL, + $SRC_SEARCH_URL, $CONTENTS_SEARCH_CGI, + $CN_HELP_URL, $BUG_URL, $SRC_BUG_URL, $QA_URL ); our $CHANGELOG_URL = '/changelogs'; -our $COPYRIGHT_URL = '/changelogs'; -our $SEARCH_URL = "$ROOT/search/"; -our $SRC_SEARCH_URL = "$SEARCH_CGI?searchon=sourcenames&version=all&exact=1&keywords="; -our $BUG_URL = 'http://bugs.debian.org/'; -our $SRC_BUG_URL = 'http://bugs.debian.org/src:'; -our $QA_URL = 'http://packages.qa.debian.org/'; - - -my %img_trans = ( pt_BR => "pt", pt_PT => "pt", sv_SE => "sv" ); sub img { my ( $root, $url, $src, $alt, %attr ) = @_; @@ -197,7 +184,7 @@ sub pmoreinfo { $src_dir =~ s,pool/non-US,pool,o; $str .= "
".sprintf( gettext( "View the Debian changelog" ), "$CHANGELOG_URL/$src_dir/$src_basename/changelog" )."
\n"; - my $copyright_url = "$COPYRIGHT_URL/$src_dir/$src_basename/"; + my $copyright_url = "$CHANGELOG_URL/$src_dir/$src_basename/"; $copyright_url .= ( $is_source ? 'copyright' : "$name.copyright" ); $str .= sprintf( gettext( "View the copyright file" ), @@ -296,6 +283,7 @@ sub header { $search_in_header = <
+ @@ -327,8 +315,9 @@ MENU filelist => "", ); $checked_searchmode{$values{searchmode}} = "checked=\"checked\""; $search_in_header = < +
+ @@ -357,7 +346,6 @@ MENU my $KEYWORDS_LINE = ""; my $LANG = $params{lang}; - my $img_lang = $img_trans{$LANG} || $LANG; my $charset = get_charset($LANG); my $txt = < diff --git a/lib/Packages/Search.pm b/lib/Packages/Search.pm index ee3b7d2..ae8cee3 100644 --- a/lib/Packages/Search.pm +++ b/lib/Packages/Search.pm @@ -46,14 +46,19 @@ use warnings; use CGI qw( -oldstyle_urls ); use POSIX; use HTML::Entities; +use DB_File; use Deb::Versions; +use Packages::CGI; use Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( nextlink prevlink indexline - resperpagelink ); + resperpagelink + read_entry read_src_entry find_binaries + do_names_search do_fulltext_search + printindexline multipageheader ); our %EXPORT_TAGS = ( all => [ @EXPORT_OK ] ); our $VERSION = 0.01; @@ -66,7 +71,7 @@ our %page_params = ( page => { default => DEFAULT_PAGE, number => { default => DEFAULT_RES_PER_PAGE, match => '(\d+)' } ); -our $debug = 0; +our $too_many_hits = 0; sub parse_params { my ( $cgi, $params_def, $opts ) = @_; @@ -74,7 +79,7 @@ sub parse_params { my %params_ret = ( values => {}, errors => {} ); my %params; if ($USE_PAGED_MODE) { - print "DEBUG: Use PAGED_MODE
" if $debug; + debug( "Use PAGED_MODE", 2 ); %params = %$params_def; foreach (keys %page_params) { delete $params{$_}; @@ -86,7 +91,7 @@ sub parse_params { foreach my $param ( keys %params ) { - print "

DEBUG: Param $param
" if $debug; + debug( "Param $param", 2 ); my $p_value_orig = $cgi->param($param); @@ -94,18 +99,17 @@ sub parse_params { && defined $params_def->{$param}{alias} && defined $cgi->param($params_def->{$param}{alias})) { $p_value_orig = $cgi->param($params_def->{$param}{alias}); - print "DEBUG: Used alias $params_def->{$param}{alias}
" - if $debug; + debug( "Used alias $params_def->{$param}{alias}", + 2 ); } my @p_value = ($p_value_orig); - print "DEBUG: Value (Orig) ".($p_value_orig||"")."
" if $debug; + debug( "Value (Orig) ".($p_value_orig||""), 2 ); if ($params_def->{$param}{array} && defined $p_value_orig) { @p_value = split /$params_def->{$param}{array}/, $p_value_orig; - print "DEBUG: Value (Array Split) ". - join('##',@p_value)."
" if $debug; + debug( "Value (Array Split) ". join('##',@p_value), 2 ); } if ($params_def->{$param}{match} && defined $p_value_orig) { @@ -115,8 +119,7 @@ sub parse_params { } @p_value = grep { defined $_ } @p_value; - print "DEBUG: Value (Match) ". - join('##',@p_value)."
" if $debug; + debug( "Value (Match) ". join('##',@p_value), 2 ); unless (@p_value) { if (defined $params{$param}{default}) { @@ -128,8 +131,7 @@ sub parse_params { } } - print "DEBUG: Value (Default) ". - join('##',@p_value)."
" if $debug; + debug( "Value (Default) ". join('##',@p_value), 2 ); my @p_value_no_replace = @p_value; if ($params{$param}{replace} && @p_value) { @@ -150,8 +152,7 @@ sub parse_params { } } - print "DEBUG: Value (Final) ". - join('##',@p_value)."
" if $debug; + debug( "Value (Final) ". join('##',@p_value), 2 ); if ($params_def->{$param}{array}) { $params_ret{values}{$param} = { @@ -196,9 +197,11 @@ sub start { sub end { my $params = shift; - my $page = $params->{values}{page}{final} + use Data::Dumper; + debug( "end: ".Dumper($params) ); + my $page = $params->{page} || DEFAULT_PAGE; - my $res_per_page = $params->{values}{number}{final} + my $res_per_page = $params->{number} || DEFAULT_RES_PER_PAGE; return $page * $res_per_page; @@ -208,9 +211,9 @@ sub indexline { my ($cgi, $params, $num_res) = @_; my $index_line = ""; - my $page = $params->{values}{page}{final} + my $page = $params->{page} || DEFAULT_PAGE; - my $res_per_page = $params->{values}{number}{final} + my $res_per_page = $params->{number} || DEFAULT_RES_PER_PAGE; my $numpages = ceil($num_res / $res_per_page); @@ -232,10 +235,10 @@ sub indexline { sub nextlink { my ($cgi, $params, $no_results ) = @_; - my $page = $params->{values}{page}{final} + my $page = $params->{page} || DEFAULT_PAGE; $page++; - my $res_per_page = $params->{values}{number}{final} + my $res_per_page = $params->{number} || DEFAULT_RES_PER_PAGE; if ((($page-1)*$res_per_page + 1) > $no_results) { @@ -249,14 +252,14 @@ sub nextlink { sub prevlink { my ($cgi, $params ) = @_; - my $page = $params->{values}{page}{final} + my $page = $params->{page} || DEFAULT_PAGE; $page--; if (!$page) { return "<<"; } - my $res_per_page = $params->{values}{number}{final} + my $res_per_page = $params->{number} || DEFAULT_RES_PER_PAGE; return "self_url). @@ -277,5 +280,186 @@ sub resperpagelink { "&page=$page&number=$res_per_page\">$res_per_page"; } +sub printindexline { + my ( $input, $no_results, $opts ) = @_; + + my $index_line; + if ($no_results > $opts->{number}) { + + $index_line = prevlink( $input, $opts)." | ". + indexline( $input, $opts, $no_results)." | ". + nextlink( $input, $opts, $no_results); + + print "

$index_line

"; + } +} + +sub multipageheader { + my ( $input, $no_results, $opts ) = @_; + + my ($start, $end); + if ($opts->{number} =~ /^all$/i) { + $start = 1; + $end = $no_results; + $opts->{number} = $no_results; + $opts->{number_all}++; + } else { + $start = Packages::Search::start( $opts ); + $end = Packages::Search::end( $opts ); + if ($end > $no_results) { $end = $no_results; } + } + + print "

Found $no_results matching packages,"; + if ($end == $start) { + print " displaying package $end.

"; + } else { + print " displaying packages $start to $end.

"; + } + + printindexline( $input, $no_results, $opts ); + + if ($no_results > 100) { + print "

Results per page: "; + my @resperpagelinks; + for (50, 100, 200) { + if ($opts->{number} == $_) { + push @resperpagelinks, $_; + } else { + push @resperpagelinks, resperpagelink($input,$opts,$_); + } + } + if ($opts->{number_all}) { + push @resperpagelinks, "all"; + } else { + push @resperpagelinks, resperpagelink($input, $opts, "all"); + } + print join( " | ", @resperpagelinks )."

"; + } + return ( $start, $end ); +} + +sub read_entry { + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 7 ); + debug( "Considering entry ".join( ':', @data), 2); + if ($opts->{h_suites}{$data[0]} + && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all') + && $opts->{h_sections}{$data[2]}) { + debug( "Using entry ".join( ':', @data), 2); + push @$results, [ $key, @data ]; + } + } +} +sub read_src_entry { + my ($hash, $key, $results, $opts) = @_; + my $result = $hash->{$key} || ''; + foreach (split /\000/, $result) { + my @data = split ( /\s/, $_, 5 ); + debug( "Considering entry ".join( ':', @data), 2); + if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) { + debug( "Using entry ".join( ':', @data), 2); + push @$results, [ $key, @data ]; + } + } +} +sub do_names_search { + my ($keyword, $packages, $postfixes, $read_entry, $opts) = @_; + my @results; + + $keyword = lc $keyword unless $opts->{case_bool}; + + if ($opts->{exact}) { + &$read_entry( $packages, $keyword, \@results, $opts ); + } else { + my ($key, $prefixes) = ($keyword, ''); + my %pkgs; + $postfixes->seq( $key, $prefixes, R_CURSOR ); + while (index($key, $keyword) >= 0) { + if ($prefixes =~ /^\001(\d+)/o) { + $too_many_hits += $1; + } else { + foreach (split /\000/o, $prefixes) { + $_ = '' if $_ eq '^'; + debug( "add word $_$key", 2); + $pkgs{$_.$key}++; + } + } + last if $postfixes->seq( $key, $prefixes, R_NEXT ) != 0; + last if $too_many_hits or keys %pkgs >= 100; + } + + my $no_results = keys %pkgs; + if ($too_many_hits || ($no_results >= 100)) { + $too_many_hits += $no_results; + %pkgs = ( $keyword => 1 ); + } + foreach my $pkg (sort keys %pkgs) { + &$read_entry( $packages, $pkg, \@results, $opts ); + } + } + return \@results; +} +sub do_fulltext_search { + my ($keyword, $file, $did2pkg, $packages, $read_entry, $opts) = @_; + my @results; + + my @lines; + my $regex; + if ($opts->{case_bool}) { + if ($opts->{exact}) { + $regex = qr/\b\Q$keyword\E\b/o; + } else { + $regex = qr/\Q$keyword\E/o; + } + } else { + if ($opts->{exact}) { + $regex = qr/\b\Q$keyword\E\b/io; + } else { + $regex = qr/\Q$keyword\E/io; + } + } + + open DESC, '<', "$file" + or die "couldn't open $file: $!"; + while () { + $_ =~ $regex or next; + debug( "Matched line $.", 2); + push @lines, $.; + } + close DESC; + + my %tmp_results; + foreach my $l (@lines) { + my $result = $did2pkg->{$l}; + foreach (split /\000/o, $result) { + my @data = split /\s/, $_, 3; + next unless $opts->{h_archs}{$data[2]}; + $tmp_results{$data[0]}++; + } + } + foreach my $pkg (keys %tmp_results) { + &$read_entry( $packages, $pkg, \@results, $opts ); + } + return \@results; +} + +sub find_binaries { + my ($pkg, $suite, $src2bin) = @_; + + my $bins = $src2bin->{$pkg} || ''; + my %bins; + foreach (split /\000/o, $bins) { + my @data = split /\s/, $_, 4; + + if ($data[0] eq $suite) { + $bins{$data[1]}++; + } + } + + return [ keys %bins ]; +} + 1; -- 2.39.2