3 # search_packages.pl -- CGI interface to the Packages files on packages.debian.org
5 # Copyright (C) 1998 James Treacy
6 # Copyright (C) 2000, 2001 Josip Rodin
7 # Copyright (C) 2001 Adam Heath
8 # Copyright (C) 2004 Martin Schulze
9 # Copyright (C) 2004-2006 Frank Lichtenheld
11 # use is allowed under the terms of the GNU Public License (GPL)
12 # see http://www.fsf.org/copyleft/gpl.html for a copy of the license
15 use CGI qw( -oldstyle_urls );
16 use CGI::Carp qw( fatalsToBrowser );
26 use Packages::Search qw( :all );
27 use Packages::HTML ();
29 my $thisscript = $Packages::HTML::SEARCH_CGI;
30 my $HOME = "http://www.debian.org";
32 my $SEARCHPAGE = "http://packages.debian.org/";
33 my @SUITES = qw( oldstable stable testing unstable experimental );
34 my @SECTIONS = qw( main contrib non-free );
35 my @ARCHIVES = qw( us security installer );
36 my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
37 kfreebsd-i386 mips mipsel powerpc s390 sparc );
38 my %SUITES = map { $_ => 1 } @SUITES;
39 my %SECTIONS = map { $_ => 1 } @SECTIONS;
40 my %ARCHIVES = map { $_ => 1 } @ARCHIVES;
41 my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES;
43 $ENV{PATH} = "/bin:/usr/bin";
45 # Read in all the variables set by the form
47 if ($ARGV[0] && ($ARGV[0] eq 'php')) {
48 $input = new CGI(\*STDIN);
53 my $pet0 = new Benchmark;
54 # use this to disable debugging in production mode completly
55 my $debug_allowed = 1;
56 my $debug = $debug_allowed && $input->param("debug");
57 $debug = 0 if not defined($debug);
58 #$Packages::Search::debug = 1 if $debug > 1;
60 if (my $path = $input->param('path')) {
61 my @components = map { lc $_ } split /\//, $path;
63 foreach (@components) {
65 $input->param('suite', $_);
66 } elsif ($SECTIONS{$_}) {
67 $input->param('section', $_);
68 } elsif ($ARCHIVES{$_}) {
69 $input->param('archive', $_);
70 }elsif ($ARCHITECTURES{$_}) {
71 $input->param('arch', $_);
76 my ( $format, $keyword, $case, $subword, $exact, $searchon,
77 @suites, @sections, @archs );
79 my %params_def = ( keywords => { default => undef,
80 match => '^\s*([-+\@\w\/.:]+)\s*$',
82 suite => { default => 'stable', match => '^(\w+)$',
83 alias => 'version', array => ',',
85 replace => { all => \@SUITES } },
86 case => { default => 'insensitive', match => '^(\w+)$',
88 # official => { default => 0, match => '^(\w+)$' },
89 # use_cache => { default => 1, match => '^(\w+)$' },
90 subword => { default => 0, match => '^(\w+)$',
92 exact => { default => undef, match => '^(\w+)$',
94 searchon => { default => 'all', match => '^(\w+)$',
96 section => { default => 'all', match => '^([\w-]+)$',
97 alias => 'release', array => ',',
99 replace => { all => \@SECTIONS } },
100 arch => { default => 'any', match => '^(\w+)$',
101 array => ',', var => \@archs, replace =>
102 { any => \@ARCHITECTURES } },
103 archive => { default => 'all', match => '^(\w+)$',
104 array => ',', replace =>
105 { all => \@ARCHIVES } },
106 format => { default => 'html', match => '^(\w+)$',
110 my %params = Packages::Search::parse_params( $input, \%params_def, \%opts );
112 #XXX: Don't use alternative output formats yet
114 if ($format eq 'html') {
115 print $input->header;
118 my (@errors, @debug, @msgs, @hints);
126 my $lvl = $_[1] || 0;
127 push(@debug, $_[0]) if $debug > $lvl;
133 return unless @errors;
136 print "<p style=\"background-color:#F99;font-weight:bold;padding:0.5em;margin:0;\">$_</p>";
141 return unless $debug && @debug;
142 print '<div style="font-size:80%";border:solid thin grey">';
143 print '<h2>Debugging:</h2><pre>';
147 print '</pre></div>';
151 return unless @hints;
154 print "<p style=\"background-color:#FF9;padding:0.5em;margin:0\">$_</p>";
164 if ($params{errors}{keywords}) {
165 error( "Error: keyword not valid or missing" );
168 my $case_bool = ( $case !~ /insensitive/ );
169 $exact = !$subword unless defined $exact;
170 $opts{h_suites} = { map { $_ => 1 } @suites };
171 $opts{h_sections} = { map { $_ => 1 } @sections };
172 $opts{h_archs} = { map { $_ => 1 } @archs };
174 # for URL construction
175 my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
176 my $sections_param = join ',', @{$params{values}{section}{no_replace}};
177 my $archs_param = join ',', @{$params{values}{arch}{no_replace}};
180 my $keyword_enc = encode_entities $keyword;
181 my $searchon_enc = encode_entities $searchon;
182 my $suites_enc = encode_entities join ', ', @{$params{values}{suite}{no_replace}};
183 my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_replace}};
184 my $archs_enc = encode_entities join ', ', @{$params{values}{arch}{no_replace}};
185 my $pet1 = new Benchmark;
186 my $petd = timediff($pet1, $pet0);
187 debug( "Parameter evaluation took ".timestr($petd) );
189 # read the configuration
191 if (!open (C, "../config.sh")) {
192 error( "Internal Error: Cannot open configuration file." );
195 $topdir = $1 if /^\s*topdir="?(.*)"?\s*$/;
196 $ROOT = $1 if /^\s*root="?(.*)"?\s*$/;
200 my $DBDIR = $topdir . "/files/db";
201 my $search_on_sources = 0;
203 my $st0 = new Benchmark;
206 if ($searchon eq 'sourcenames') {
207 $search_on_sources = 1;
211 print Packages::HTML::header( title => 'Package Search Results' ,
213 title_tag => 'Debian Package Search Results',
214 print_title_above => 1,
215 print_search_field => 'packages',
216 search_field_values => {
217 keywords => $keyword_enc,
218 searchon => $searchon,
220 suite => $suites_enc,
221 section => $sections_enc,
230 my ($hash, $key, $results, $opts) = @_;
231 my $result = $hash->{$key} || '';
232 foreach (split /\000/, $result) {
233 my @data = split ( /\s/, $_, 7 );
234 debug( "Considering entry ".join( ':', @data), 2);
235 if ($opts->{h_suites}{$data[0]}
236 && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all')
237 && $opts->{h_sections}{$data[2]}) {
238 debug( "Using entry ".join( ':', @data), 2);
239 push @$results, [ $key, @data ];
244 my ($hash, $key, $results, $opts) = @_;
245 my $result = $hash->{$key} || '';
246 foreach (split /\000/, $result) {
247 my @data = split ( /\s/, $_, 5 );
248 debug( "Considering entry ".join( ':', @data), 2);
249 if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) {
250 debug( "Using entry ".join( ':', @data), 2);
251 push @$results, [ $key, @data ];
255 sub do_names_search {
256 my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_;
259 $keyword = lc $keyword unless $opts->{case_bool};
261 my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE
262 or die "couldn't tie DB $DBDIR/$file: $!";
264 if ($opts->{exact}) {
265 &$read_entry( \%packages, $keyword, \@results, $opts );
267 my ($key, $prefixes) = ($keyword, '');
269 my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE
270 or die "couldn't tie postfix db $DBDIR/$postfix_file: $!";
271 $p_obj->seq( $key, $prefixes, R_CURSOR );
272 while (index($key, $keyword) >= 0) {
273 if ($prefixes =~ /^\001(\d+)/o) {
274 $too_many_hits += $1;
276 foreach (split /\000/o, $prefixes) {
277 $_ = '' if $_ eq '^';
278 debug( "add word $_$key", 2);
282 last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0;
283 last if $too_many_hits or keys %pkgs >= 100;
286 my $no_results = keys %pkgs;
287 if ($too_many_hits || ($no_results >= 100)) {
288 $too_many_hits += $no_results;
289 %pkgs = ( $keyword => 1 );
291 foreach my $pkg (sort keys %pkgs) {
292 &$read_entry( \%packages, $pkg, \@results, $opts );
297 sub do_fulltext_search {
298 my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_;
303 if ($opts->{case_bool}) {
304 if ($opts->{exact}) {
305 $regex = qr/\b\Q$keyword\E\b/o;
307 $regex = qr/\Q$keyword\E/o;
310 if ($opts->{exact}) {
311 $regex = qr/\b\Q$keyword\E\b/io;
313 $regex = qr/\Q$keyword\E/io;
317 open DESC, '<', "$DBDIR/$file"
318 or die "couldn't open $DBDIR/$file: $!";
320 $_ =~ $regex or next;
321 debug( "Matched line $.", 2);
326 tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE
327 or die "couldn't tie DB $DBDIR/$lookup: $!";
328 tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE
329 or die "couldn't tie DB $DBDIR/$mapping: $!";
332 foreach my $l (@lines) {
333 my $result = $did2pkg{$l};
334 foreach (split /\000/o, $result) {
335 my @data = split /\s/, $_, 3;
336 next unless $opts->{h_archs}{$data[2]};
337 $tmp_results{$data[0]}++;
340 foreach my $pkg (keys %tmp_results) {
341 &$read_entry( \%packages, $pkg, \@results, $opts );
347 my ($pkg, $suite) = @_;
349 tie my %src2bin, 'DB_File', "$DBDIR/sources_packages.db", O_RDONLY, 0666, $DB_BTREE
350 or die "couldn't open $DBDIR/sources_packages.db: $!";
352 my $bins = $src2bin{$pkg} || '';
354 foreach (split /\000/o, $bins) {
355 my @data = split /\s/, $_, 4;
357 if ($data[0] eq $suite) {
362 return [ keys %bins ];
365 if ($searchon eq 'names') {
366 push @results, @{ do_names_search( $keyword, 'packages_small.db',
367 'package_postfixes.db',
368 \&read_entry, \%opts ) };
369 } elsif ($searchon eq 'sourcenames') {
370 push @results, @{ do_names_search( $keyword, 'sources_small.db',
371 'source_postfixes.db',
372 \&read_src_entry, \%opts ) };
374 push @results, @{ do_names_search( $keyword, 'packages_small.db',
375 'package_postfixes.db',
376 \&read_entry, \%opts ) };
377 push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt',
378 'descriptions_packages.db',
380 \&read_entry, \%opts ) };
383 my $st1 = new Benchmark;
384 my $std = timediff($st1, $st0);
385 debug( "Search took ".timestr($std) );
387 if ($format eq 'html') {
388 my $suite_wording = $suites_enc eq "all" ? "all suites"
389 : "suite(s) <em>$suites_enc</em>";
390 my $section_wording = $sections_enc eq 'all' ? "all sections"
391 : "section(s) <em>$sections_enc</em>";
392 my $arch_wording = $archs_enc eq 'any' ? "all architectures"
393 : "architecture(s) <em>$archs_enc</em>";
394 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
395 my $source_wording = $search_on_sources ? "source " : "";
396 my $exact_wording = $exact ? "named" : "that names contain";
397 msg( "You have searched for ${source_wording}packages $exact_wording <em>$keyword_enc</em> in $suite_wording, $section_wording, and $arch_wording." );
399 my $exact_wording = $exact ? "" : " (including subword matching)";
400 msg( "You have searched for <em>$keyword_enc</em> in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording." );
404 if ($too_many_hits) {
405 error( "Your search was too wide so we will only display exact matches. At least <em>$too_many_hits</em> results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." );
409 if ($format eq 'html') {
410 my $keyword_esc = uri_escape( $keyword );
412 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
413 if (($suites_enc eq 'all')
414 && ($archs_enc eq 'any')
415 && ($sections_enc eq 'all')) {
416 error( "Can't find that package." );
418 error( "Can't find that package, at least not in that suite ".
419 ( $search_on_sources ? "" : " and on that architecture" ) )
423 hint( "You have searched only for exact matches of the package name. You can try to search for <a href=\"$thisscript?exact=0&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">package names that contain your search string</a>." );
426 if (($suites_enc eq 'all')
427 && ($archs_enc eq 'any')
428 && ($sections_enc eq 'all')) {
429 error( "Can't find that string." );
431 error( "Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc)." );
435 hint( "You have searched only for words exactly matching your keywords. You can try to search <a href=\"$thisscript?subword=1&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">allowing subword matching</a>." );
438 hint( ( @hints ? "Or you" : "You" )." can try a different search on the <a href=\"$SEARCHPAGE#search_packages\">Packages search page</a>." );
452 return unless @results;
454 my (%pkgs, %sect, %part, %desc, %binaries);
456 unless ($search_on_sources) {
458 my ($pkg_t, $suite, $arch, $section, $subsection,
459 $priority, $version, $desc) = @$_;
461 my ($pkg) = $pkg_t =~ m/^(.+)/; # untaint
462 $pkgs{$pkg}{$suite}{$version}{$arch} = 1;
463 $sect{$pkg}{$suite}{$version} = $subsection;
464 $part{$pkg}{$suite}{$version} = $section
465 unless $section eq 'main';
467 $desc{$pkg}{$suite}{$version} = $desc;
470 if ($format eq 'html') {
471 my ($start, $end) = multipageheader( scalar keys %pkgs );
474 foreach my $pkg (sort keys %pkgs) {
476 next if $count < $start or $count > $end;
477 printf "<h3>Package %s</h3>\n", $pkg;
479 foreach my $suite (@SUITES) {
480 if (exists $pkgs{$pkg}{$suite}) {
481 my @versions = version_sort keys %{$pkgs{$pkg}{$suite}};
483 if ($part{$pkg}{$suite}{$versions[0]}) {
484 $part_str = "[<span style=\"color:red\">$part{$pkg}{$suite}{$versions[0]}</span>]";
486 printf "<li><a href=\"$ROOT/%s/%s\">%s</a> (%s): %s %s\n",
487 $suite, $pkg, $suite, $sect{$pkg}{$suite}{$versions[0]},
488 $desc{$pkg}{$suite}{$versions[0]}, $part_str;
490 foreach my $v (@versions) {
491 printf "<br>%s: %s\n",
492 $v, join (" ", (sort keys %{$pkgs{$pkg}{$suite}{$v}}) );
502 my ($pkg, $suite, $section, $subsection, $priority,
505 $pkgs{$pkg}{$suite} = $version;
506 $sect{$pkg}{$suite}{source} = $subsection;
507 $part{$pkg}{$suite}{source} = $section
508 unless $section eq 'main';
510 $binaries{$pkg}{$suite} = find_binaries( $pkg, $suite );
513 if ($format eq 'html') {
514 my ($start, $end) = multipageheader( scalar keys %pkgs );
517 foreach my $pkg (sort keys %pkgs) {
519 next if ($count < $start) or ($count > $end);
520 printf "<h3>Source package %s</h3>\n", $pkg;
522 foreach my $suite (@SUITES) {
523 if (exists $pkgs{$pkg}{$suite}) {
525 if ($part{$pkg}{$suite}{source}) {
526 $part_str = "[<span style=\"color:red\">$part{$pkg}{$suite}{source}</span>]";
528 printf( "<li><a href=\"$ROOT/%s/source/%s\">%s</a> (%s): %s %s",
529 $suite, $pkg, $suite, $sect{$pkg}{$suite}{source},
530 $pkgs{$pkg}{$suite}, $part_str );
532 print "<br>Binary packages: ";
534 foreach my $bp (@{$binaries{$pkg}{$suite}}) {
535 my $bp_link = sprintf( "<a href=\"$ROOT/%s/%s\">%s</a>",
536 $suite, uri_escape( $bp ), $bp );
537 push @bp_links, $bp_link;
539 print join( ", ", @bp_links );
547 printindexline( scalar keys %pkgs );
553 my $no_results = shift;
556 if ($no_results > $opts{number}) {
558 $index_line = prevlink($input,\%params)." | ".
559 indexline( $input, \%params, $no_results)." | ".
560 nextlink($input,\%params, $no_results);
562 print "<p style=\"text-align:center\">$index_line</p>";
566 sub multipageheader {
567 my $no_results = shift;
570 if ($opts{number} =~ /^all$/i) {
573 $opts{number} = $no_results;
575 $start = Packages::Search::start( \%params );
576 $end = Packages::Search::end( \%params );
577 if ($end > $no_results) { $end = $no_results; }
580 print "<p>Found <em>$no_results</em> matching packages,";
581 if ($end == $start) {
582 print " displaying package $end.</p>";
584 print " displaying packages $start to $end.</p>";
587 printindexline( $no_results );
589 if ($no_results > 100) {
590 print "<p>Results per page: ";
593 if ($opts{number} == $_) {
594 push @resperpagelinks, $_;
596 push @resperpagelinks, resperpagelink($input,\%params,$_);
599 if ($params{values}{number}{final} =~ /^all$/i) {
600 push @resperpagelinks, "all";
602 push @resperpagelinks, resperpagelink($input, \%params,"all");
604 print join( " | ", @resperpagelinks )."</p>";
606 return ( $start, $end );
611 my $pete = new Benchmark;
612 my $petd = timediff($pete, $pet0);
613 print "Total page evaluation took ".timestr($petd)."<br>"
616 my $trailer = Packages::HTML::trailer( $ROOT );
617 $trailer =~ s/LAST_MODIFIED_DATE/gmtime()/e; #FIXME