a94dd7bc33e46fd54415cbc50011d9e45b7128b4
[deb/packages.git] / lib / Packages / DoSearch.pm
1 package Packages::DoSearch;
2
3 use strict;
4 use warnings;
5
6 use Benchmark ':hireswallclock';
7 use DB_File;
8 use URI::Escape;
9 use HTML::Entities;
10 use Exporter;
11 our @ISA = qw( Exporter );
12 our @EXPORT = qw( do_search );
13
14 use Deb::Versions;
15 use Packages::I18N::Locale;
16 use Packages::Search qw( :all );
17 use Packages::CGI qw( :DEFAULT );
18 use Packages::DB;
19 use Packages::Config qw( $DBDIR @SUITES @ARCHIVES $ROOT );
20
21 sub do_search {
22     my ($params, $opts, $html_header, $page_content) = @_;
23
24     $Params::Search::too_many_hits = 0;
25
26     if ($params->{errors}{keywords}) {
27         fatal_error( _g( "keyword not valid or missing" ) );
28         $opts->{keywords} = [];
29     } elsif (grep { length($_) < 2 } @{$opts->{keywords}}) {
30         fatal_error( _g( "keyword too short (keywords need to have at least two characters)" ) );
31     }
32
33     my @keywords = @{$opts->{keywords}};
34     my $searchon = $opts->{searchon};
35     $page_content->{search_keywords} = \@keywords;
36
37     my $st0 = new Benchmark;
38     my (@results, @non_results);
39
40     unless (@Packages::CGI::fatal_errors) {
41
42         if ($searchon eq 'names') {
43             if ($opts->{source}) {
44                 do_names_search( [ @keywords ], \%sources, $sp_obj,
45                                  \&read_src_entry_all, $opts,
46                                  \@results, \@non_results );
47             } else {
48                 do_names_search( [ @keywords ], \%packages, $p_obj,
49                                  \&read_entry_all, $opts,
50                                  \@results, \@non_results );
51             }
52         } else {
53             do_names_search( [ @keywords ], \%packages, $p_obj,
54                              \&read_entry_all, $opts,
55                              \@results, \@non_results );
56 #           my $fts0 = new Benchmark;
57 #           do_fulltext_search( [ @keywords ], "$DBDIR/descriptions.txt",
58 #                               \%did2pkg, \%packages,
59 #                               \&read_entry_all, $opts,
60 #                               \@results, \@non_results );
61             my $fts1 = new Benchmark;
62             do_xapian_search( [ @keywords ], "$DBDIR/xapian/",
63                                 \%did2pkg, \%packages,
64                                 \&read_entry_all, $opts,
65                                 \@results, \@non_results );
66             my $fts2 = new Benchmark;
67 #           my $fts_grep = timediff($fts1,$fts0);
68             my $fts_xapian = timediff($fts2,$fts1);
69 #           debug( "Fulltext search took ".timestr($fts_grep)." (grep)" ) if DEBUG;
70             debug( "Fulltext search took ".timestr($fts_xapian)." (Xapian)" )
71                 if DEBUG;
72         }
73     }
74
75 #    use Data::Dumper;
76 #    debug( join( "", Dumper( \@results, \@non_results )) ) if DEBUG;
77     my $st1 = new Benchmark;
78     my $std = timediff($st1, $st0);
79     debug( "Search took ".timestr($std) ) if DEBUG;
80
81     $page_content->{too_many_hits} = $Packages::Search::too_many_hits;
82     #FIXME: non_results can't be compared to results since it is
83     # not normalized to unique packages
84     $page_content->{non_results} = scalar @non_results;
85
86     if (@results) {
87         my (%pkgs, %subsect, %sect, %archives, %desc, %binaries, %provided_by);
88
89         unless ($opts->{source}) {
90             foreach (@results) {
91                 my ($pkg_t, $archive, $suite, $arch, $section, $subsection,
92                     $priority, $version, $desc) = @$_;
93
94                 my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint
95                 if ($arch ne 'virtual') {
96                     $pkgs{$pkg}{$suite}{$version}{$arch} = 1;
97                     $subsect{$pkg}{$suite}{$version} = $subsection;
98                     $sect{$pkg}{$suite}{$version} = $section;
99                     $archives{$pkg}{$suite}{$version} ||= $archive;
100
101                     $desc{$pkg}{$suite}{$version} = $desc;
102                 } else {
103                     $provided_by{$pkg}{$suite} = [ split /\s+/, $desc ];
104                 }
105             }
106
107             my %uniq_pkgs = map { $_ => 1 } (keys %pkgs, keys %provided_by);
108             my @pkgs = sort keys %uniq_pkgs;
109             process_packages( $page_content, 'packages', \%pkgs, \@pkgs, $opts, \@keywords,
110                               \&process_package, \%provided_by,
111                               \%archives, \%sect, \%subsect,
112                               \%desc );
113
114         } else { # unless $opts->{source}
115             foreach (@results) {
116                 my ($pkg, $archive, $suite, $section, $subsection, $priority,
117                     $version) = @$_;
118
119                 my $real_archive = '';
120                 if ($archive =~ /^(security|non-US)$/) {
121                     $real_archive = $archive;
122                     $archive = 'us';
123                 }
124                 if (($real_archive eq $archive) &&
125                     $pkgs{$pkg}{$suite}{$archive} &&
126                     (version_cmp( $pkgs{$pkg}{$suite}{$archive}, $version ) >= 0)) {
127                     next;
128                 }
129                 $pkgs{$pkg}{$suite}{$archive} = $version;
130                 $subsect{$pkg}{$suite}{$archive}{source} = $subsection;
131                 $sect{$pkg}{$suite}{$archive}{source} = $section
132                     unless $section eq 'main';
133                 $archives{$pkg}{$suite}{$archive}{source} = $real_archive
134                     if $real_archive;
135
136                 $binaries{$pkg}{$suite}{$archive} = find_binaries( $pkg, $archive, $suite, \%src2bin );
137             }
138
139             my @pkgs = sort keys %pkgs;
140             process_packages( $page_content, 'src_packages', \%pkgs, \@pkgs, $opts, \@keywords,
141                               \&process_src_package, \%archives,
142                               \%sect, \%subsect, \%binaries );
143         } # else unless $opts->{source}
144     } # if @results
145 } # sub do_search
146
147 sub process_packages {
148     my ($content, $target, $pkgs, $pkgs_list, $opts, $keywords, $print_func, @func_args) = @_;
149
150     my @categories;
151     $content->{results} = scalar @$pkgs_list;
152
153     my $keyword;
154     $keyword = $keywords->[0] if @$keywords == 1;
155             
156     my $have_exact;
157     if ($keyword && grep { $_ eq $keyword } @$pkgs_list) {
158         $have_exact = 1;
159         $categories[0]{name} = _g( "Exact hits" );
160
161         $categories[0]{$target} = [ &$print_func( $keyword, $pkgs->{$keyword}||{},
162                                                    map { $_->{$keyword}||{} } @func_args ) ];
163         @$pkgs_list = grep { $_ ne $keyword } @$pkgs_list;
164     }
165             
166     if (@$pkgs_list && (($opts->{searchon} ne 'names') || !$opts->{exact})) {
167         my %cat;
168         $cat{name} = _g( 'Other hits' ) if $have_exact;
169         
170         $cat{packages} = [];
171         foreach my $pkg (@$pkgs_list) {
172             push @{$cat{$target}}, &$print_func( $pkg, $pkgs->{$pkg}||{},
173                                                  map { $_->{$pkg}||{} } @func_args );
174         }
175         push @categories, \%cat;
176     } elsif (@$pkgs_list) {
177         $content->{skipped} = scalar @$pkgs_list;
178     }
179
180     $content->{categories} = \@categories;
181 }
182
183 sub process_package {
184     my ($pkg, $pkgs, $provided_by, $archives, $sect, $subsect, $desc) = @_;
185
186     my %pkg = ( pkg => $pkg,
187                 suites => [] );
188
189     foreach my $suite (@SUITES) {
190         my %suite = ( suite => $suite );
191         if (exists $pkgs->{$suite}) {
192             my %archs_printed;
193             my @versions = version_sort keys %{$pkgs->{$suite}};
194             $suite{section} = $sect->{$suite}{$versions[0]};
195             $suite{subsection} = $subsect->{$suite}{$versions[0]};
196             $suite{desc} = $desc->{$suite}{$versions[0]};
197             $suite{versions} = [];
198                 
199             foreach my $v (@versions) {
200                 my %version;
201                 $version{version} = $v;
202                 $version{archive} = $archives->{$suite}{$v};
203                     
204                 $version{architectures} = [ grep { !$archs_printed{$_} } sort keys %{$pkgs->{$suite}{$v}} ];
205                 push @{$suite{versions}}, \%version if @{$version{architectures}};
206
207                 $archs_printed{$_}++ foreach @{$version{architectures}};
208             }
209             if (my $p =  $provided_by->{$suite}) {
210                 $suite{providers} = $p;
211             }
212         } elsif (my $p =  $provided_by->{$suite}) {
213             $suite{desc} = _g('Virtual package');
214             $suite{providers} = $p;
215         }
216         push @{$pkg{suites}}, \%suite if $suite{versions} || $suite{providers};
217     }
218
219     return \%pkg;
220 }
221
222 sub process_src_package {
223     my ($pkg, $pkgs, $archives, $sect, $subsect, $binaries) = @_;
224
225     my %pkg = ( pkg => $pkg,
226                 origins => [] );
227
228     foreach my $suite (@SUITES) {
229         foreach my $archive (@ARCHIVES) {
230             if (exists $pkgs->{$suite}{$archive}) {
231                 my %origin;
232                 $origin{version} = $pkgs->{$suite}{$archive};
233                 $origin{suite} = $suite;
234                 $origin{archive} = $archive; 
235                 $origin{section} = $sect->{$suite}{$archive}{source};
236                 $origin{subsection} = $subsect->{$suite}{$archive}{source};
237                 $origin{real_archive} = $archives->{$suite}{$archive}{source};
238
239                 $origin{binaries} = $binaries->{$suite}{$archive};
240                 push @{$pkg{origins}}, \%origin;
241             }
242         }
243     }
244
245     return \%pkg;
246 }
247
248 1;