X-Git-Url: https://git.deb.at/?p=deb%2Fpackages.git;a=blobdiff_plain;f=lib%2FPackages%2FDoSearch.pm;h=c1b6dfdd01cd8c51420a530e68ff895eb9bab91b;hp=3fc84721889e42243518130b63e2ca23a8091839;hb=08aa87adaf6c59131d01f8a4a078dc4e78475788;hpb=ef769eaccb9ebf7f49b8c3240c9c4d6980072ee5 diff --git a/lib/Packages/DoSearch.pm b/lib/Packages/DoSearch.pm index 3fc8472..c1b6dfd 100644 --- a/lib/Packages/DoSearch.pm +++ b/lib/Packages/DoSearch.pm @@ -3,7 +3,7 @@ package Packages::DoSearch; use strict; use warnings; -use Benchmark; +use Benchmark ':hireswallclock'; use DB_File; use URI::Escape; use HTML::Entities; @@ -12,39 +12,28 @@ our @ISA = qw( Exporter ); our @EXPORT = qw( do_search ); use Deb::Versions; +use Packages::I18N::Locale; use Packages::Search qw( :all ); -use Packages::CGI; +use Packages::CGI qw( :DEFAULT ); use Packages::DB; -use Packages::Config qw( $DBDIR $SEARCH_URL $SEARCH_CGI $SEARCH_PAGE - @SUITES @ARCHIVES $ROOT ); +use Packages::Config qw( $DBDIR @SUITES @ARCHIVES $ROOT ); sub do_search { - my ($params, $opts, $html_header, $menu, $page_content) = @_; + my ($params, $opts, $page_content) = @_; + + $Params::Search::too_many_hits = 0; if ($params->{errors}{keywords}) { - fatal_error( "keyword not valid or missing" ); - } elsif (length($opts->{keywords}) < 2) { - fatal_error( "keyword too short (keywords need to have at least two characters)" ); + fatal_error( _g( "keyword not valid or missing" ) ); + $opts->{keywords} = []; + } elsif (grep { length($_) < 2 } @{$opts->{keywords}}) { + fatal_error( _g( "keyword too short (keywords need to have at least two characters)" ) ); } - $$menu = ""; - - my $keyword = $opts->{keywords}; + my @keywords = @{$opts->{keywords}}; my $searchon = $opts->{searchon}; + $page_content->{search_keywords} = \@keywords; - # for URL construction - my $keyword_esc = uri_escape( $keyword ); - my $suites_param = join ',', @{$params->{values}{suite}{no_replace}}; - my $sections_param = join ',', @{$params->{values}{section}{no_replace}}; - my $archs_param = join ',', @{$params->{values}{arch}{no_replace}}; - - # for output - my $keyword_enc = encode_entities $keyword || ''; - my $searchon_enc = encode_entities $searchon; - my $suites_enc = encode_entities( join( ', ', @{$params->{values}{suite}{no_replace}} ) ); - my $sections_enc = encode_entities( join( ', ', @{$params->{values}{section}{no_replace}} ) ); - my $archs_enc = encode_entities( join( ', ', @{$params->{values}{arch}{no_replace}} ) ); - my $st0 = new Benchmark; my (@results, @non_results); @@ -52,143 +41,89 @@ sub do_search { if ($searchon eq 'names') { if ($opts->{source}) { - do_names_search( $keyword, \%sources, $sp_obj, + do_names_search( [ @keywords ], \%sources, $sp_obj, \&read_src_entry_all, $opts, \@results, \@non_results ); } else { - do_names_search( $keyword, \%packages, $p_obj, + do_names_search( [ @keywords ], \%packages, $p_obj, \&read_entry_all, $opts, \@results, \@non_results ); } -# } elsif ($searchon eq 'contents') { -# require "./search_contents.pl"; -# &contents($input); } else { - do_names_search( $keyword, \%packages, $p_obj, + do_names_search( [ @keywords ], \%packages, $p_obj, \&read_entry_all, $opts, \@results, \@non_results ); - do_fulltext_search( $keyword, "$DBDIR/descriptions.txt", + my $fts1 = new Benchmark; + do_xapian_search( [ @keywords ], "$DBDIR/xapian/", \%did2pkg, \%packages, \&read_entry_all, $opts, \@results, \@non_results ); + my $fts2 = new Benchmark; + my $fts_xapian = timediff($fts2,$fts1); + debug( "Fulltext search took ".timestr($fts_xapian) ) + if DEBUG; } } - + # use Data::Dumper; -# debug( join( "", Dumper( \@results, \@non_results )) ); +# debug( join( "", Dumper( \@results, \@non_results )) ) if DEBUG; my $st1 = new Benchmark; my $std = timediff($st1, $st0); - debug( "Search took ".timestr($std) ); - - my $suite_wording = $suites_enc eq "all" ? "all suites" - : "suite(s) $suites_enc"; - my $section_wording = $sections_enc eq 'all' ? "all sections" - : "section(s) $sections_enc"; - my $arch_wording = $archs_enc eq 'any' ? "all architectures" - : "architecture(s) $archs_enc"; - if ($searchon eq "names") { - my $source_wording = $opts->{source} ? "source " : ""; - my $exact_wording = $opts->{exact} ? "named" : "that names contain"; - msg( "You have searched for ${source_wording}packages $exact_wording $keyword_enc in $suite_wording, $section_wording, and $arch_wording." ); - } else { - my $exact_wording = $opts->{exact} ? "" : " (including subword matching)"; - msg( "You have searched for $keyword_enc in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording." ); - } + debug( "Search took ".timestr($std) ) if DEBUG; - if ($Packages::Search::too_many_hits) { - error( "Your search was too wide so we will only display exact matches. At least $Packages::Search::too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ); - } - - if (!@Packages::CGI::fatal_errors && !@results) { - my $printed = 0; - if ($searchon eq "names") { - unless (@non_results) { - error( "Can't find that package." ); - } else { - hint( "Can't find that package. ". - "". - ($#non_results+1)."". - " results have not been displayed due to the". - " search parameters." ); - } - - } else { - if (($suites_enc eq 'all') - && ($archs_enc eq 'any') - && ($sections_enc eq 'all')) { - error( "Can't find that string." ); - } else { - error( "Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc)." ); - } - - if ($opts->{exact}) { - $printed++; - hint( "You have searched only for words exactly matching your keywords. You can try to search allowing subword matching." ); - } - } - hint( ( $printed ? "Or you" : "You" )." can try a different search on the Packages search page." ); - - } + $page_content->{too_many_hits} = $Packages::Search::too_many_hits; + #FIXME: non_results can't be compared to results since it is + # not normalized to unique packages + $page_content->{non_results} = scalar @non_results; - %$html_header = ( title => 'Package Search Results' , - lang => 'en', - title_tag => 'Debian Package Search Results', - print_title => 1, - print_search_field => 'packages', - search_field_values => { - keywords => $keyword_enc, - searchon => $opts->{searchon_form}, - arch => $archs_enc, - suite => $suites_enc, - section => $sections_enc, - exact => $opts->{exact}, - debug => $opts->{debug}, - }, - ); - - $$page_content = ''; if (@results) { my (%pkgs, %subsect, %sect, %archives, %desc, %binaries, %provided_by); + my %sort_by_relevance; + for (1 ... scalar @results) { +# debug("$results[$_][0] => $_", 4) if DEBUG; + $sort_by_relevance{$results[$_-1][0]} = $_; + } +# use Data::Dumper; +# debug( "sort_by_relevance=".Dumper(\%sort_by_relevance), 4); + unless ($opts->{source}) { foreach (@results) { my ($pkg_t, $archive, $suite, $arch, $section, $subsection, $priority, $version, $desc) = @$_; - + my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint if ($arch ne 'virtual') { - my $real_archive; - if ($archive =~ /^(security|non-US)$/) { - $real_archive = $archive; - $archive = 'us'; - } - - $pkgs{$pkg}{$suite}{$archive}{$version}{$arch} = 1; - $subsect{$pkg}{$suite}{$archive}{$version} = $subsection; - $sect{$pkg}{$suite}{$archive}{$version} = $section - unless $section eq 'main'; - $archives{$pkg}{$suite}{$archive}{$version} = $real_archive - if $real_archive; - - $desc{$pkg}{$suite}{$archive}{$version} = $desc; + $pkgs{$pkg}{$suite}{$version}{$arch} = 1; + $subsect{$pkg}{$suite}{$version} = $subsection; + $sect{$pkg}{$suite}{$version} = $section; + $archives{$pkg}{$suite}{$version} ||= $archive; + + $desc{$pkg}{$suite}{$version} = $desc; } else { - $provided_by{$pkg}{$suite}{$archive} = [ split /\s+/, $desc ]; + $provided_by{$pkg}{$suite} = [ split /\s+/, $desc ]; } } - my @pkgs = sort(keys %pkgs, keys %provided_by); - $$page_content .= print_packages( \%pkgs, \@pkgs, $opts, $keyword, - \&print_package, \%provided_by, - \%archives, \%sect, \%subsect, - \%desc ); + my %uniq_pkgs = map { $_ => 1 } (keys %pkgs, keys %provided_by); + my @pkgs; + if ($searchon eq 'names') { + @pkgs = sort keys %uniq_pkgs; + } else { + @pkgs = sort { $sort_by_relevance{$a} <=> $sort_by_relevance{$b} } keys %uniq_pkgs; + } + process_packages( $page_content, 'packages', \%pkgs, \@pkgs, $opts, \@keywords, + \&process_package, \%provided_by, + \%archives, \%sect, \%subsect, + \%desc ); } else { # unless $opts->{source} foreach (@results) { my ($pkg, $archive, $suite, $section, $subsection, $priority, $version) = @$_; - + my $real_archive = ''; - if ($archive =~ /^(security|non-US)$/) { + if ($archive eq 'security') { $real_archive = $archive; $archive = 'us'; } @@ -208,131 +143,112 @@ sub do_search { } my @pkgs = sort keys %pkgs; - $$page_content .= print_packages( \%pkgs, \@pkgs, $opts, $keyword, - \&print_src_package, \%archives, - \%sect, \%subsect, \%binaries ); + process_packages( $page_content, 'src_packages', \%pkgs, \@pkgs, $opts, \@keywords, + \&process_src_package, \%archives, + \%sect, \%subsect, \%binaries ); } # else unless $opts->{source} } # if @results } # sub do_search -sub print_packages { - my ($pkgs, $pkgs_list, $opts, $keyword, $print_func, @func_args) = @_; +sub process_packages { + my ($content, $target, $pkgs, $pkgs_list, $opts, $keywords, $print_func, @func_args) = @_; + + my @categories; + $content->{results} = scalar @$pkgs_list; - #my ($start, $end) = multipageheader( $input, scalar @pkgs, \%opts ); - my $str .= "

Found ".(scalar @$pkgs_list)." matching packages."; - #my $count = 0; + my $keyword; + $keyword = $keywords->[0] if @$keywords == 1; my $have_exact; - if (grep { $_ eq $keyword } @$pkgs_list) { + if ($keyword && grep { $_ eq $keyword } @$pkgs_list) { $have_exact = 1; - $str .= '

Exact hits

'; - $str .= &$print_func( $keyword, $pkgs->{$keyword}||{}, - map { $_->{$keyword}||{} } @func_args ); + $categories[0]{name} = _g( "Exact hits" ); + + $categories[0]{$target} = [ &$print_func( $keyword, $pkgs->{$keyword}||{}, + map { $_->{$keyword}||{} } @func_args ) ]; @$pkgs_list = grep { $_ ne $keyword } @$pkgs_list; } if (@$pkgs_list && (($opts->{searchon} ne 'names') || !$opts->{exact})) { - $str .= '

Other hits

' - if $have_exact; + my %cat; + $cat{name} = _g( 'Other hits' ) if $have_exact; + $cat{packages} = []; foreach my $pkg (@$pkgs_list) { - #$count++; - #next if $count < $start or $count > $end; - $str .= &$print_func( $pkg, $pkgs->{$pkg}||{}, - map { $_->{$pkg}||{} } @func_args ); + push @{$cat{$target}}, &$print_func( $pkg, $pkgs->{$pkg}||{}, + map { $_->{$pkg}||{} } @func_args ); } + push @categories, \%cat; } elsif (@$pkgs_list) { - $str .= "

". - ($#{$pkgs_list}+1)." results have not been displayed because you requested only exact matches.

"; + $content->{skipped} = scalar @$pkgs_list; } - return $str; + $content->{categories} = \@categories; } -sub print_package { +sub process_package { my ($pkg, $pkgs, $provided_by, $archives, $sect, $subsect, $desc) = @_; - my $str = sprintf "

Package %s

\n", $pkg; - return $str; - $str .= "\n"; - return $str; + + return \%pkg; } -sub print_src_package { +sub process_src_package { my ($pkg, $pkgs, $archives, $sect, $subsect, $binaries) = @_; - my $str = sprintf "

Source package %s

\n", $pkg; - $str .= "\n"; - return $str; + + return \%pkg; } 1;