Remove obsolete descriptions.txt previously used for fulltext search
[deb/packages.git] / lib / Packages / DoSearch.pm
1 package Packages::DoSearch;
2
3 use strict;
4 use warnings;
5
6 use Benchmark ':hireswallclock';
7 use DB_File;
8 use URI::Escape;
9 use HTML::Entities;
10 use Exporter;
11 our @ISA = qw( Exporter );
12 our @EXPORT = qw( do_search );
13
14 use Deb::Versions;
15 use Packages::I18N::Locale;
16 use Packages::Search qw( :all );
17 use Packages::CGI qw( :DEFAULT );
18 use Packages::DB;
19 use Packages::Config qw( $DBDIR @SUITES @ARCHIVES $ROOT );
20
21 sub do_search {
22     my ($params, $opts, $html_header, $page_content) = @_;
23
24     $Params::Search::too_many_hits = 0;
25
26     if ($params->{errors}{keywords}) {
27         fatal_error( _g( "keyword not valid or missing" ) );
28         $opts->{keywords} = [];
29     } elsif (grep { length($_) < 2 } @{$opts->{keywords}}) {
30         fatal_error( _g( "keyword too short (keywords need to have at least two characters)" ) );
31     }
32
33     my @keywords = @{$opts->{keywords}};
34     my $searchon = $opts->{searchon};
35     $page_content->{search_keywords} = \@keywords;
36
37     my $st0 = new Benchmark;
38     my (@results, @non_results);
39
40     unless (@Packages::CGI::fatal_errors) {
41
42         if ($searchon eq 'names') {
43             if ($opts->{source}) {
44                 do_names_search( [ @keywords ], \%sources, $sp_obj,
45                                  \&read_src_entry_all, $opts,
46                                  \@results, \@non_results );
47             } else {
48                 do_names_search( [ @keywords ], \%packages, $p_obj,
49                                  \&read_entry_all, $opts,
50                                  \@results, \@non_results );
51             }
52         } else {
53             do_names_search( [ @keywords ], \%packages, $p_obj,
54                              \&read_entry_all, $opts,
55                              \@results, \@non_results );
56             my $fts1 = new Benchmark;
57             do_xapian_search( [ @keywords ], "$DBDIR/xapian/",
58                                 \%did2pkg, \%packages,
59                                 \&read_entry_all, $opts,
60                                 \@results, \@non_results );
61             my $fts2 = new Benchmark;
62             my $fts_xapian = timediff($fts2,$fts1);
63             debug( "Fulltext search took ".timestr($fts_xapian) )
64                 if DEBUG;
65         }
66     }
67
68 #    use Data::Dumper;
69 #    debug( join( "", Dumper( \@results, \@non_results )) ) if DEBUG;
70     my $st1 = new Benchmark;
71     my $std = timediff($st1, $st0);
72     debug( "Search took ".timestr($std) ) if DEBUG;
73
74     $page_content->{too_many_hits} = $Packages::Search::too_many_hits;
75     #FIXME: non_results can't be compared to results since it is
76     # not normalized to unique packages
77     $page_content->{non_results} = scalar @non_results;
78
79     if (@results) {
80         my (%pkgs, %subsect, %sect, %archives, %desc, %binaries, %provided_by);
81
82         unless ($opts->{source}) {
83             foreach (@results) {
84                 my ($pkg_t, $archive, $suite, $arch, $section, $subsection,
85                     $priority, $version, $desc) = @$_;
86
87                 my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint
88                 if ($arch ne 'virtual') {
89                     $pkgs{$pkg}{$suite}{$version}{$arch} = 1;
90                     $subsect{$pkg}{$suite}{$version} = $subsection;
91                     $sect{$pkg}{$suite}{$version} = $section;
92                     $archives{$pkg}{$suite}{$version} ||= $archive;
93
94                     $desc{$pkg}{$suite}{$version} = $desc;
95                 } else {
96                     $provided_by{$pkg}{$suite} = [ split /\s+/, $desc ];
97                 }
98             }
99
100             my %uniq_pkgs = map { $_ => 1 } (keys %pkgs, keys %provided_by);
101             my @pkgs = sort keys %uniq_pkgs;
102             process_packages( $page_content, 'packages', \%pkgs, \@pkgs, $opts, \@keywords,
103                               \&process_package, \%provided_by,
104                               \%archives, \%sect, \%subsect,
105                               \%desc );
106
107         } else { # unless $opts->{source}
108             foreach (@results) {
109                 my ($pkg, $archive, $suite, $section, $subsection, $priority,
110                     $version) = @$_;
111
112                 my $real_archive = '';
113                 if ($archive =~ /^(security|non-US)$/) {
114                     $real_archive = $archive;
115                     $archive = 'us';
116                 }
117                 if (($real_archive eq $archive) &&
118                     $pkgs{$pkg}{$suite}{$archive} &&
119                     (version_cmp( $pkgs{$pkg}{$suite}{$archive}, $version ) >= 0)) {
120                     next;
121                 }
122                 $pkgs{$pkg}{$suite}{$archive} = $version;
123                 $subsect{$pkg}{$suite}{$archive}{source} = $subsection;
124                 $sect{$pkg}{$suite}{$archive}{source} = $section
125                     unless $section eq 'main';
126                 $archives{$pkg}{$suite}{$archive}{source} = $real_archive
127                     if $real_archive;
128
129                 $binaries{$pkg}{$suite}{$archive} = find_binaries( $pkg, $archive, $suite, \%src2bin );
130             }
131
132             my @pkgs = sort keys %pkgs;
133             process_packages( $page_content, 'src_packages', \%pkgs, \@pkgs, $opts, \@keywords,
134                               \&process_src_package, \%archives,
135                               \%sect, \%subsect, \%binaries );
136         } # else unless $opts->{source}
137     } # if @results
138 } # sub do_search
139
140 sub process_packages {
141     my ($content, $target, $pkgs, $pkgs_list, $opts, $keywords, $print_func, @func_args) = @_;
142
143     my @categories;
144     $content->{results} = scalar @$pkgs_list;
145
146     my $keyword;
147     $keyword = $keywords->[0] if @$keywords == 1;
148             
149     my $have_exact;
150     if ($keyword && grep { $_ eq $keyword } @$pkgs_list) {
151         $have_exact = 1;
152         $categories[0]{name} = _g( "Exact hits" );
153
154         $categories[0]{$target} = [ &$print_func( $keyword, $pkgs->{$keyword}||{},
155                                                    map { $_->{$keyword}||{} } @func_args ) ];
156         @$pkgs_list = grep { $_ ne $keyword } @$pkgs_list;
157     }
158             
159     if (@$pkgs_list && (($opts->{searchon} ne 'names') || !$opts->{exact})) {
160         my %cat;
161         $cat{name} = _g( 'Other hits' ) if $have_exact;
162         
163         $cat{packages} = [];
164         foreach my $pkg (@$pkgs_list) {
165             push @{$cat{$target}}, &$print_func( $pkg, $pkgs->{$pkg}||{},
166                                                  map { $_->{$pkg}||{} } @func_args );
167         }
168         push @categories, \%cat;
169     } elsif (@$pkgs_list) {
170         $content->{skipped} = scalar @$pkgs_list;
171     }
172
173     $content->{categories} = \@categories;
174 }
175
176 sub process_package {
177     my ($pkg, $pkgs, $provided_by, $archives, $sect, $subsect, $desc) = @_;
178
179     my %pkg = ( pkg => $pkg,
180                 suites => [] );
181
182     foreach my $suite (@SUITES) {
183         my %suite = ( suite => $suite );
184         if (exists $pkgs->{$suite}) {
185             my %archs_printed;
186             my @versions = version_sort keys %{$pkgs->{$suite}};
187             $suite{section} = $sect->{$suite}{$versions[0]};
188             $suite{subsection} = $subsect->{$suite}{$versions[0]};
189             $suite{desc} = $desc->{$suite}{$versions[0]};
190             $suite{versions} = [];
191                 
192             foreach my $v (@versions) {
193                 my %version;
194                 $version{version} = $v;
195                 $version{archive} = $archives->{$suite}{$v};
196                     
197                 $version{architectures} = [ grep { !$archs_printed{$_} } sort keys %{$pkgs->{$suite}{$v}} ];
198                 push @{$suite{versions}}, \%version if @{$version{architectures}};
199
200                 $archs_printed{$_}++ foreach @{$version{architectures}};
201             }
202             if (my $p =  $provided_by->{$suite}) {
203                 $suite{providers} = $p;
204             }
205         } elsif (my $p =  $provided_by->{$suite}) {
206             $suite{desc} = _g('Virtual package');
207             $suite{providers} = $p;
208         }
209         push @{$pkg{suites}}, \%suite if $suite{versions} || $suite{providers};
210     }
211
212     return \%pkg;
213 }
214
215 sub process_src_package {
216     my ($pkg, $pkgs, $archives, $sect, $subsect, $binaries) = @_;
217
218     my %pkg = ( pkg => $pkg,
219                 origins => [] );
220
221     foreach my $suite (@SUITES) {
222         foreach my $archive (@ARCHIVES) {
223             if (exists $pkgs->{$suite}{$archive}) {
224                 my %origin;
225                 $origin{version} = $pkgs->{$suite}{$archive};
226                 $origin{suite} = $suite;
227                 $origin{archive} = $archive; 
228                 $origin{section} = $sect->{$suite}{$archive}{source};
229                 $origin{subsection} = $subsect->{$suite}{$archive}{source};
230                 $origin{real_archive} = $archives->{$suite}{$archive}{source};
231
232                 $origin{binaries} = $binaries->{$suite}{$archive};
233                 push @{$pkg{origins}}, \%origin;
234             }
235         }
236     }
237
238     return \%pkg;
239 }
240
241 1;