X-Git-Url: https://git.deb.at/?p=deb%2Fpackages.git;a=blobdiff_plain;f=lib%2FPackages%2FDoSearch.pm;h=efefa975ad6ca5848c82ca94aeaacfb45201650b;hp=2a59937b84423ad9ec04573417fc2150053f6865;hb=0f318fa9ad9d473b543a48f46a7714a11283d300;hpb=078ddbf7006d957bbef2daa9735ec9a8bf3d8fcd diff --git a/lib/Packages/DoSearch.pm b/lib/Packages/DoSearch.pm index 2a59937..efefa97 100644 --- a/lib/Packages/DoSearch.pm +++ b/lib/Packages/DoSearch.pm @@ -5,45 +5,38 @@ use warnings; use Benchmark ':hireswallclock'; use DB_File; -use URI::Escape; -use HTML::Entities; use Exporter; our @ISA = qw( Exporter ); our @EXPORT = qw( do_search ); use Deb::Versions; -use Packages::I18N::Locale; use Packages::Search qw( :all ); -use Packages::CGI qw( :DEFAULT msg error ); +use Packages::CGI qw( :DEFAULT ); use Packages::DB; -use Packages::Config qw( $DBDIR @SUITES @ARCHIVES $ROOT ); +use Packages::Config qw( $DBDIR @SUITES @ARCHIVES @ARCHITECTURES $ROOT ); sub do_search { - my ($params, $opts, $html_header, $page_content) = @_; + my ($params, $opts, $page_content) = @_; + my $cat = $opts->{cat}; $Params::Search::too_many_hits = 0; if ($params->{errors}{keywords}) { - fatal_error( _g( "keyword not valid or missing" ) ); + fatal_error( $cat->g( "keyword not valid or missing" ) ); $opts->{keywords} = []; } elsif (grep { length($_) < 2 } @{$opts->{keywords}}) { - fatal_error( _g( "keyword too short (keywords need to have at least two characters)" ) ); + fatal_error( $cat->g( "keyword too short (keywords need to have at least two characters)" ) ); } my @keywords = @{$opts->{keywords}}; my $searchon = $opts->{searchon}; + $page_content->{search_keywords} = $opts->{keywords}; + $page_content->{all_architectures} = \@ARCHITECTURES; + $page_content->{all_suites} = \@SUITES; + $page_content->{search_architectures} = $opts->{arch}; + $page_content->{search_suites} = $opts->{suite}; + $page_content->{sections} = $opts->{section}; - # for URL construction - my $keyword_esc = uri_escape( "@keywords" ); - $opts->{keywords_esc} = $keyword_esc; - - # for output - my $keyword_enc = encode_entities "@keywords" || ''; - my $searchon_enc = encode_entities $searchon; - my $suites_enc = encode_entities( join( ', ', @{$params->{values}{suite}{no_replace}} ) ); - my $sections_enc = encode_entities( join( ', ', @{$params->{values}{section}{no_replace}} ) ); - my $archs_enc = encode_entities( join( ', ', @{$params->{values}{arch}{no_replace}} ) ); - my $st0 = new Benchmark; my (@results, @non_results); @@ -63,111 +56,67 @@ sub do_search { do_names_search( [ @keywords ], \%packages, $p_obj, \&read_entry_all, $opts, \@results, \@non_results ); - do_fulltext_search( [ @keywords ], "$DBDIR/descriptions.txt", + my $fts1 = new Benchmark; + do_xapian_search( [ @keywords ], "$DBDIR/xapian/", \%did2pkg, \%packages, \&read_entry_all, $opts, \@results, \@non_results ); + my $fts2 = new Benchmark; + my $fts_xapian = timediff($fts2,$fts1); + debug( "Fulltext search took ".timestr($fts_xapian) ) + if DEBUG; } } - + # use Data::Dumper; # debug( join( "", Dumper( \@results, \@non_results )) ) if DEBUG; my $st1 = new Benchmark; my $std = timediff($st1, $st0); debug( "Search took ".timestr($std) ) if DEBUG; - - my $suite_wording = $suites_enc =~ /^(default|all)$/ ? _g("all suites") - : sprintf(_g("suite(s) %s", $suites_enc) ); - my $section_wording = $sections_enc eq 'all' ? _g("all sections") - : sprintf(_g("section(s) %s", $sections_enc) ); - my $arch_wording = $archs_enc eq 'any' ? _g("all architectures") - : sprintf(_g("architecture(s) %s", $archs_enc) ); - if ($searchon eq "names") { - my $source_wording = $opts->{source} ? _g("source packages") : _g("packages"); - # sorry to all translators for that one... (patches welcome) - msg( sprintf( _g( "You have searched for %s that names contain %s in %s, %s, and %s." ), - $source_wording, $keyword_enc, - $suite_wording, $section_wording, $arch_wording ) ); - } else { - my $exact_wording = $opts->{exact} ? "" : _g(" (including subword matching)"); - msg( sprintf( _g( "You have searched for %s in packages names and descriptions in %s, %s, and %s%s." ), - $keyword_enc, - $suite_wording, $section_wording, $arch_wording, - $exact_wording ) ); - } - - if ($Packages::Search::too_many_hits) { - error( sprintf( _g( "Your search was too wide so we will only display exact matches. At least %s results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ), $Packages::Search::too_many_hits ) ); - } - - if (!@Packages::CGI::fatal_errors && !@results) { - if ($searchon eq "names") { - unless (@non_results) { - error( _g( "Can't find that package." ) ); - } else { -# hint( _g( "Can't find that package." )." ". -# sprintf( _g( '%s'. -# " results have not been displayed due to the". -# " search parameters." ), "$SEARCH_URL/$keyword_esc" , -# $#non_results+1 ) ); - } - - } else { - if (($suites_enc eq 'all') - && ($archs_enc eq 'any') - && ($sections_enc eq 'all')) { - error( _g( "Can't find that string." ) ); - } else { - error( sprintf( _g( "Can't find that string, at least not in that suite (%s, section %s) and on that architecture (%s)." ), - $suites_enc, $sections_enc, $archs_enc ) ); - } - - if ($opts->{exact}) { - hint( sprintf( _g( 'You have searched only for words exactly matching your keywords. You can try to search allowing subword matching.' ), - encode_entities(make_search_url('',"keywords=$keyword_esc",{exact => 0})) ) ); - } - } -# hint( sprintf( _g( 'You can try a different search on the Packages search page.' ), "$SEARCH_PAGE#search_packages" ) ); - - } - $page_content->{make_url} = sub { return &Packages::CGI::make_url(@_) }; - $page_content->{make_search_url} = sub { return &Packages::CGI::make_search_url(@_) }; - - $page_content->{search_field_values} = { - keywords => $keyword_enc, - searchon => $opts->{searchon_form}, - arch => $archs_enc, - suite => $suites_enc, - section => $sections_enc, - exact => $opts->{exact}, - debug => $opts->{debug}, - }; + $page_content->{too_many_hits} = $Packages::Search::too_many_hits; + #FIXME: non_results can't be compared to results since it is + # not normalized to unique packages + $page_content->{non_results} = scalar @non_results; if (@results) { my (%pkgs, %subsect, %sect, %archives, %desc, %binaries, %provided_by); + my %sort_by_relevance; + for (1 ... scalar @results) { +# debug("$results[$_][0] => $_", 4) if DEBUG; + $sort_by_relevance{$results[$_-1][0]} = $_; + } +# use Data::Dumper; +# debug( "sort_by_relevance=".Dumper(\%sort_by_relevance), 4); + unless ($opts->{source}) { foreach (@results) { my ($pkg_t, $archive, $suite, $arch, $section, $subsection, - $priority, $version, $desc) = @$_; - + $priority, $version, $desc_md5, $desc) = @$_; + my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint if ($arch ne 'virtual') { $pkgs{$pkg}{$suite}{$version}{$arch} = 1; $subsect{$pkg}{$suite}{$version} = $subsection; $sect{$pkg}{$suite}{$version} = $section; $archives{$pkg}{$suite}{$version} ||= $archive; - - $desc{$pkg}{$suite}{$version} = $desc; + + $desc{$pkg}{$suite}{$version} = [ $desc_md5, $desc ]; } else { $provided_by{$pkg}{$suite} = [ split /\s+/, $desc ]; } } my %uniq_pkgs = map { $_ => 1 } (keys %pkgs, keys %provided_by); - my @pkgs = sort keys %uniq_pkgs; - process_packages( $page_content, 'packages', \%pkgs, \@pkgs, $opts, \@keywords, + my @pkgs; + if ($searchon eq 'names') { + @pkgs = sort keys %uniq_pkgs; + } else { + @pkgs = sort { $sort_by_relevance{$a} <=> $sort_by_relevance{$b} } keys %uniq_pkgs; + } + process_packages( $page_content, 'packages', \%pkgs, \@pkgs, + $opts, \@keywords, \&process_package, \%provided_by, \%archives, \%sect, \%subsect, \%desc ); @@ -176,14 +125,13 @@ sub do_search { foreach (@results) { my ($pkg, $archive, $suite, $section, $subsection, $priority, $version) = @$_; - + my $real_archive = ''; - if ($archive =~ /^(security|non-US)$/) { + if ($archive eq 'security') { $real_archive = $archive; $archive = 'us'; } - if (($real_archive eq $archive) && - $pkgs{$pkg}{$suite}{$archive} && + if ($pkgs{$pkg}{$suite}{$archive} && (version_cmp( $pkgs{$pkg}{$suite}{$archive}, $version ) >= 0)) { next; } @@ -198,7 +146,8 @@ sub do_search { } my @pkgs = sort keys %pkgs; - process_packages( $page_content, 'src_packages', \%pkgs, \@pkgs, $opts, \@keywords, + process_packages( $page_content, 'src_packages', \%pkgs, \@pkgs, + $opts, \@keywords, \&process_src_package, \%archives, \%sect, \%subsect, \%binaries ); } # else unless $opts->{source} @@ -217,20 +166,21 @@ sub process_packages { my $have_exact; if ($keyword && grep { $_ eq $keyword } @$pkgs_list) { $have_exact = 1; - $categories[0]{name} = _g( "Exact hits" ); + $categories[0]{name} = $opts->{cat}->g( "Exact hits" ); - $categories[0]{$target} = [ &$print_func( $keyword, $pkgs->{$keyword}||{}, - map { $_->{$keyword}||{} } @func_args ) ]; + $categories[0]{$target} = [ &$print_func( $opts, $keyword, + $pkgs->{$keyword}||{}, + map { $_->{$keyword}||{} } @func_args ) ]; @$pkgs_list = grep { $_ ne $keyword } @$pkgs_list; } if (@$pkgs_list && (($opts->{searchon} ne 'names') || !$opts->{exact})) { my %cat; - $cat{name} = _g( 'Other hits' ) if $have_exact; + $cat{name} = $opts->{cat}->g( 'Other hits' ) if $have_exact; $cat{packages} = []; foreach my $pkg (@$pkgs_list) { - push @{$cat{$target}}, &$print_func( $pkg, $pkgs->{$pkg}||{}, + push @{$cat{$target}}, &$print_func( $opts, $pkg, $pkgs->{$pkg}||{}, map { $_->{$pkg}||{} } @func_args ); } push @categories, \%cat; @@ -242,7 +192,8 @@ sub process_packages { } sub process_package { - my ($pkg, $pkgs, $provided_by, $archives, $sect, $subsect, $desc) = @_; + my ($opts, $pkg, $pkgs, $provided_by, + $archives, $sect, $subsect, $desc) = @_; my %pkg = ( pkg => $pkg, suites => [] ); @@ -254,9 +205,22 @@ sub process_package { my @versions = version_sort keys %{$pkgs->{$suite}}; $suite{section} = $sect->{$suite}{$versions[0]}; $suite{subsection} = $subsect->{$suite}{$versions[0]}; - $suite{desc} = $desc->{$suite}{$versions[0]}; + my $desc_md5 = $desc->{$suite}{$versions[0]}[0]; + $suite{desc} = $desc->{$suite}{$versions[0]}[1]; $suite{versions} = []; - + + my $trans_desc = $desctrans{$desc_md5}; + my %sdescs; + if ($trans_desc) { + my %trans_desc = split /\000|\001/, $trans_desc; + while (my ($l, $d) = each %trans_desc) { + $d =~ s/\n.*//os; + + $sdescs{$l} = $d; + } + $suite{trans_desc} = \%sdescs; + } + foreach my $v (@versions) { my %version; $version{version} = $v; @@ -271,7 +235,7 @@ sub process_package { $suite{providers} = $p; } } elsif (my $p = $provided_by->{$suite}) { - $suite{desc} = _g('Virtual package'); + $suite{desc} = $opts->{cat}->g('Virtual package'); $suite{providers} = $p; } push @{$pkg{suites}}, \%suite if $suite{versions} || $suite{providers}; @@ -281,7 +245,7 @@ sub process_package { } sub process_src_package { - my ($pkg, $pkgs, $archives, $sect, $subsect, $binaries) = @_; + my ($opts, $pkg, $pkgs, $archives, $sect, $subsect, $binaries) = @_; my %pkg = ( pkg => $pkg, origins => [] );