3 # search_packages.pl -- CGI interface to the Packages files on packages.debian.org
5 # Copyright (C) 1998 James Treacy
6 # Copyright (C) 2000, 2001 Josip Rodin
7 # Copyright (C) 2001 Adam Heath
8 # Copyright (C) 2004 Martin Schulze
9 # Copyright (C) 2004-2006 Frank Lichtenheld
11 # use is allowed under the terms of the GNU Public License (GPL)
12 # see http://www.fsf.org/copyleft/gpl.html for a copy of the license
15 use CGI qw( -oldstyle_urls );
16 use CGI::Carp qw( fatalsToBrowser );
26 use Packages::Search qw( :all );
27 use Packages::HTML ();
29 my $thisscript = $Packages::HTML::SEARCH_CGI;
30 my $HOME = "http://www.debian.org";
31 my $ROOT = "http://merkel.debian.org/~jeroen/pdo";
32 my $SEARCHPAGE = "http://packages.debian.org/";
33 my @SUITES = qw( oldstable stable testing unstable experimental );
34 my @SECTIONS = qw( main contrib non-free );
35 my @ARCHIVES = qw( us security installer );
36 my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
37 kfreebsd-i386 mips mipsel powerpc s390 sparc );
38 my %SUITES = map { $_ => 1 } @SUITES;
39 my %SECTIONS = map { $_ => 1 } @SECTIONS;
40 my %ARCHIVES = map { $_ => 1 } @ARCHIVES;
41 my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES;
43 $ENV{PATH} = "/bin:/usr/bin";
45 # Read in all the variables set by the form
47 if ($ARGV[0] && ($ARGV[0] eq 'php')) {
48 $input = new CGI(\*STDIN);
53 my $pet0 = new Benchmark;
54 # use this to disable debugging in production mode completly
55 my $debug_allowed = 1;
56 my $debug = $debug_allowed && $input->param("debug");
57 $debug = 0 if not defined($debug);
58 $Search::Param::debug = 1 if $debug > 1;
60 # If you want, just print out a list of all of the variables and exit.
61 print $input->header if $debug;
65 if (my $path = $input->param('path')) {
66 my @components = map { lc $_ } split /\//, $path;
68 foreach (@components) {
70 $input->param('suite', $_);
71 } elsif ($SECTIONS{$_}) {
72 $input->param('section', $_);
73 } elsif ($ARCHIVES{$_}) {
74 $input->param('archive', $_);
75 }elsif ($ARCHITECTURES{$_}) {
76 $input->param('arch', $_);
81 my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' },
82 suite => { default => 'stable', match => '^(\w+)$',
83 alias => 'version', array => ',',
84 replace => { all => \@SUITES } },
85 case => { default => 'insensitive', match => '^(\w+)$' },
86 official => { default => 0, match => '^(\w+)$' },
87 use_cache => { default => 1, match => '^(\w+)$' },
88 subword => { default => 0, match => '^(\w+)$' },
89 exact => { default => undef, match => '^(\w+)$' },
90 searchon => { default => 'all', match => '^(\w+)$' },
91 section => { default => 'all', match => '^([\w-]+)$',
92 alias => 'release', array => ',',
93 replace => { all => \@SECTIONS } },
94 arch => { default => 'any', match => '^(\w+)$',
95 array => ',', replace =>
96 { any => \@ARCHITECTURES } },
97 archive => { default => 'all', match => '^(\w+)$',
98 array => ',', replace =>
99 { all => \@ARCHIVES } },
100 format => { default => 'html', match => '^(\w+)$' },
102 my %params = Packages::Search::parse_params( $input, \%params_def );
104 my $format = $params{values}{format}{final};
105 #XXX: Don't use alternative output formats yet
108 if ($format eq 'html') {
109 print $input->header;
110 } elsif ($format eq 'xml') {
111 # print $input->header( -type=>'application/rdf+xml' );
112 print $input->header( -type=>'text/plain' );
115 if ($params{errors}{keywords}) {
116 print "Error: keyword not valid or missing" if $format eq 'html';
119 my $keyword = $params{values}{keywords}{final};
120 my @suites = @{$params{values}{suite}{final}};
121 my $official = $params{values}{official}{final};
122 my $use_cache = $params{values}{use_cache}{final};
123 my $case = $params{values}{case}{final};
124 my $case_bool = ( $case !~ /insensitive/ );
125 my $subword = $params{values}{subword}{final};
126 my $exact = $params{values}{exact}{final};
127 $exact = !$subword unless defined $exact;
128 my $searchon = $params{values}{searchon}{final};
129 my @sections = @{$params{values}{section}{final}};
130 my @archs = @{$params{values}{arch}{final}};
131 my $page = $params{values}{page}{final};
132 my $results_per_page = $params{values}{number}{final};
133 my %opts = ( case_bool => $case_bool, exact => $exact );
135 # for URL construction
136 my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
137 my $sections_param = join ',', @{$params{values}{section}{no_replace}};
138 my $archs_param = join ',', @{$params{values}{arch}{no_replace}};
141 my $keyword_enc = encode_entities $keyword;
142 my $searchon_enc = encode_entities $searchon;
143 my $suites_enc = encode_entities join ', ', @{$params{values}{suite}{no_replace}};
144 my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_replace}};
145 my $archs_enc = encode_entities join ', ', @{$params{values}{arch}{no_replace}};
146 my $pet1 = new Benchmark;
147 my $petd = timediff($pet1, $pet0);
148 print "DEBUG: Parameter evaluation took ".timestr($petd)."<br>" if $debug;
150 if ($format eq 'html') {
151 print Packages::HTML::header( title => 'Package Search Results' ,
153 title_tag => 'Debian Package Search Results',
154 print_title_above => 1,
155 print_search_field => 'packages',
156 search_field_values => {
157 keywords => $keyword_enc,
158 searchon => $searchon,
160 suite => $suites_enc,
161 section => $sections_enc,
169 # read the configuration
171 if (!open (C, "../config.sh")) {
172 print "\nInternal Error: Cannot open configuration file.\n\n"
173 if $format eq 'html';
177 $topdir = $1 if (/^\s*topdir="?(.*)"?\s*$/);
181 my $DBDIR = $topdir . "/files/db";
182 my $search_on_sources = 0;
184 my $st0 = new Benchmark;
187 if ($searchon eq 'sourcenames') {
188 $search_on_sources = 1;
191 my %suites = map { $_ => 1 } @suites;
192 my %sections = map { $_ => 1 } @sections;
193 my %archs = map { $_ => 1 } @archs;
195 print "DEBUG: suites=@suites, sections=@sections, archs=@archs<br>"
199 my ($hash, $key, $results) = @_;
200 my $result = $hash->{$key};
201 foreach (split /\000/, $result) {
202 my @data = split ( /\s/, $_, 7 );
203 print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
204 if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
205 && $sections{$data[2]}) {
206 print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
207 push @$results, [ $key, @data ];
212 my ($hash, $key, $results) = @_;
213 my $result = $hash->{$key};
215 foreach (split /\000/, $result) {
216 my @data = split ( /\s/, $_, 5 );
217 print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
218 if ($suites{$data[0]} && $sections{$data[1]}) {
219 print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
220 push @$results, [ $key, @data ];
224 sub do_names_search {
225 my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_;
228 $keyword = lc $keyword unless $opts->{case_bool};
230 my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE
231 or die "couldn't tie DB $DBDIR/$file: $!";
233 if ($opts->{exact}) {
234 &$read_entry( \%packages, $keyword, \@results );
236 my ($key, $prefixes) = ($keyword, '');
238 my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE
239 or die "couldn't tie postfix db $DBDIR/$postfix_file: $!";
240 $p_obj->seq( $key, $prefixes, R_CURSOR );
241 while (index($key, $keyword) >= 0) {
242 if ($prefixes =~ /^\001(\d+)/o) {
243 $too_many_hits += $1;
245 foreach (split /\000/o, $prefixes) {
246 $_ = '' if $_ eq '^';
247 print "DEBUG: add word $_$key<br>" if $debug > 2;
251 last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0;
252 last if $too_many_hits or keys %pkgs >= 100;
255 my $no_results = keys %pkgs;
256 if ($too_many_hits || ($no_results >= 100)) {
257 $too_many_hits += $no_results;
258 %pkgs = ( $keyword => 1 );
260 foreach my $pkg (sort keys %pkgs) {
261 &$read_entry( \%packages, $pkg, \@results );
266 sub do_fulltext_search {
267 my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_;
272 if ($opts->{case_bool}) {
273 if ($opts->{exact}) {
274 $regex = qr/\b\Q$keyword\E\b/o;
276 $regex = qr/\Q$keyword\E/o;
280 $regex = qr/\b\Q$keyword\E\b/io;
282 $regex = qr/\Q$keyword\E/io;
286 open DESC, '<', "$DBDIR/$file"
287 or die "couldn't open $DBDIR/$file: $!";
289 $_ =~ $regex or next;
290 print "DEBUG: Matched line $.<br>" if $debug > 2;
295 tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE
296 or die "couldn't tie DB $DBDIR/$lookup: $!";
297 tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE
298 or die "couldn't tie DB $DBDIR/$mapping: $!";
301 foreach my $l (@lines) {
302 my $result = $did2pkg{$l};
303 foreach (split /\000/o, $result) {
304 my @data = split /\s/, $_, 3;
305 next unless $archs{$data[2]};
306 $tmp_results{$data[0]}++;
309 foreach my $pkg (keys %tmp_results) {
310 &$read_entry( \%packages, $pkg, \@results );
315 if ($searchon eq 'names') {
316 push @results, @{ do_names_search( $keyword, 'packages_small.db',
317 'package_postfixes.db',
318 \&read_entry, \%opts ) };
319 } elsif ($searchon eq 'sourcenames') {
320 push @results, @{ do_names_search( $keyword, 'sources_small.db',
321 'source_postfixes.db',
322 \&read_src_entry, \%opts ) };
324 push @results, @{ do_names_search( $keyword, 'packages_small.db',
325 'package_postfixes.db',
326 \&read_entry, \%opts ) };
327 push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt',
328 'descriptions_packages.db',
330 \&read_entry, \%opts ) };
333 my $st1 = new Benchmark;
334 my $std = timediff($st1, $st0);
335 print "DEBUG: Search took ".timestr($std)."<br>" if $debug;
337 if ($format eq 'html') {
338 my $suite_wording = $suites_enc eq "all" ? "all suites"
339 : "suite(s) <em>$suites_enc</em>";
340 my $section_wording = $sections_enc eq 'all' ? "all sections"
341 : "section(s) <em>$sections_enc</em>";
342 my $arch_wording = $archs_enc eq 'any' ? "all architectures"
343 : "architecture(s) <em>$archs_enc</em>";
344 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
345 my $source_wording = $search_on_sources ? "source " : "";
346 my $exact_wording = $exact ? "named" : "that names contain";
347 print "<p>You have searched for ${source_wording}packages $exact_wording <em>$keyword_enc</em> in $suite_wording, $section_wording, and $arch_wording.</p>";
349 my $exact_wording = $exact ? "" : " (including subword matching)";
350 print "<p>You have searched for <em>$keyword_enc</em> in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording.</p>";
354 if ($too_many_hits) {
355 print "<p><strong>Your search was too wide so we will only display exact matches. At least <em>$too_many_hits</em> results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.</strong></p>";
359 if ($format eq 'html') {
360 my $keyword_esc = uri_escape( $keyword );
362 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
363 if (($suites_enc eq 'all')
364 && ($archs_enc eq 'any')
365 && ($sections_enc eq 'all')) {
366 print "<p><strong>Can't find that package.</strong></p>\n";
368 print "<p><strong>Can't find that package, at least not in that suite ".
369 ( $search_on_sources ? "" : " and on that architecture" ).
375 print "<p>You have searched only for exact matches of the package name. You can try to search for <a href=\"$thisscript?exact=0&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">package names that contain your search string</a>.</p>";
378 if (($suites_enc eq 'all')
379 && ($archs_enc eq 'any')
380 && ($sections_enc eq 'all')) {
381 print "<p><strong>Can't find that string.</strong></p>\n";
383 print "<p><strong>Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc).</strong></p>\n";
388 print "<p>You have searched only for words exactly matching your keywords. You can try to search <a href=\"$thisscript?subword=1&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">allowing subword matching</a>.</p>";
391 print "<p>".( $printed ? "Or you" : "You" )." can try a different search on the <a href=\"$SEARCHPAGE#search_packages\">Packages search page</a>.</p>";
398 my (%pkgs, %sect, %part, %desc, %binaries);
400 unless ($search_on_sources) {
402 my ($pkg_t, $suite, $arch, $section, $subsection,
403 $priority, $version, $desc) = @$_;
405 my ($package) = $pkg_t =~ m/^(.+)/; # untaint
406 $pkgs{$package}{$suite}{$version}{$arch} = 1;
407 $sect{$package}{$suite}{$version} = $subsection;
408 $part{$package}{$suite}{$version} = $section unless $section eq 'main';
410 $desc{$package}{$suite}{$version} = $desc;
414 if ($format eq 'html') {
415 my ($start, $end) = multipageheader( scalar keys %pkgs );
418 foreach my $pkg (sort keys %pkgs) {
420 next if $count < $start or $count > $end;
421 printf "<h3>Package %s</h3>\n", $pkg;
423 foreach my $ver (@SUITES) {
424 if (exists $pkgs{$pkg}{$ver}) {
425 my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
427 if ($part{$pkg}{$ver}{$versions[0]}) {
428 $part_str = "[<span style=\"color:red\">$part{$pkg}{$ver}{$versions[0]}</span>]";
430 printf "<li><a href=\"$ROOT/%s/%s/%s\">%s</a> (%s): %s %s\n",
431 $ver, $sect{$pkg}{$ver}{$versions[0]}, $pkg, $ver, $sect{$pkg}{$ver}{$versions[0]}, $desc{$pkg}{$ver}{$versions[0]}, $part_str;
433 foreach my $v (@versions) {
434 printf "<br>%s: %s\n",
435 $v, join (" ", (sort keys %{$pkgs{$pkg}{$ver}{$v}}) );
442 } elsif ($format eq 'xml') {
443 require RDF::Simple::Serialiser;
444 my $rdf = new RDF::Simple::Serialiser;
445 $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
447 foreach my $pkg (sort keys %pkgs) {
448 foreach my $ver (@SUITES) {
449 if (exists $pkgs{$pkg}{$ver}) {
450 my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
451 foreach my $version (@versions) {
452 my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version";
453 push @triples, [ $id, 'debpkg:package', $pkg ];
454 push @triples, [ $id, 'debpkg:version', $version ];
455 push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ];
456 push @triples, [ $id, 'debpkg:suite', $ver ];
457 push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ];
458 push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ];
459 foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) {
460 push @triples, [ $id, 'debpkg:architecture', $arch ];
467 print $rdf->serialise(@triples);
471 my ($package, $suite, $section, $subsection, $priority,
472 $version, $binaries) = @$_;
474 $pkgs{$package}{$suite} = $version;
475 $sect{$package}{$suite}{source} = $subsection;
476 $part{$package}{$suite}{source} = $section unless $section eq 'main';
478 $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ];
481 if ($format eq 'html') {
482 my ($start, $end) = multipageheader( scalar keys %pkgs );
485 foreach my $pkg (sort keys %pkgs) {
487 next if ($count < $start) or ($count > $end);
488 printf "<h3>Source package %s</h3>\n", $pkg;
490 foreach my $ver (@SUITES) {
491 if (exists $pkgs{$pkg}{$ver}) {
493 if ($part{$pkg}{$ver}{source}) {
494 $part_str = "[<span style=\"color:red\">$part{$pkg}{$ver}{source}</span>]";
496 printf "<li><a href=\"$ROOT/%s/source/%s\">%s</a> (%s): %s %s", $ver, $pkg, $ver, $sect{$pkg}{$ver}{source}, $pkgs{$pkg}{$ver}, $part_str;
498 print "<br>Binary packages: ";
500 foreach my $bp (@{$binaries{$pkg}{$ver}}) {
501 my $sect = 'section';
505 $bp_link = sprintf( "<a href=\"$ROOT/%s/%s/%s\">%s</a>",
506 $ver, $sect, uri_escape( $bp ), $bp );
510 push @bp_links, $bp_link;
512 print join( ", ", @bp_links );
518 } elsif ($format eq 'xml') {
519 require RDF::Simple::Serialiser;
520 my $rdf = new RDF::Simple::Serialiser;
521 $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
523 foreach my $pkg (sort keys %pkgs) {
524 foreach my $ver (@SUITES) {
525 if (exists $pkgs{$pkg}{$ver}) {
526 my $id = "$ROOT/$ver/source/$pkg";
528 push @triples, [ $id, 'debpkg:package', $pkg ];
529 push @triples, [ $id, 'debpkg:type', 'source' ];
530 push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{source} ];
531 push @triples, [ $id, 'debpkg:version', $pkgs{$pkg}{$ver} ];
532 push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{source} || 'main' ];
534 foreach my $bp (@{$binaries{$pkg}{$ver}}) {
535 push @triples, [ $id, 'debpkg:binary', $bp ];
540 print $rdf->serialise(@triples);
544 if ($format eq 'html') {
545 &printindexline( scalar keys %pkgs );
552 my $no_results = shift;
555 if ($no_results > $results_per_page) {
557 $index_line = prevlink($input,\%params)." | ".
558 indexline( $input, \%params, $no_results)." | ".
559 nextlink($input,\%params, $no_results);
561 print "<p style=\"text-align:center\">$index_line</p>";
565 sub multipageheader {
566 my $no_results = shift;
569 if ($results_per_page =~ /^all$/i) {
572 $results_per_page = $no_results;
574 $start = Packages::Search::start( \%params );
575 $end = Packages::Search::end( \%params );
576 if ($end > $no_results) { $end = $no_results; }
579 print "<p>Found <em>$no_results</em> matching packages,";
580 if ($end == $start) {
581 print " displaying package $end.</p>";
583 print " displaying packages $start to $end.</p>";
586 printindexline( $no_results );
588 if ($no_results > 100) {
589 print "<p>Results per page: ";
592 if ($results_per_page == $_) {
593 push @resperpagelinks, $_;
595 push @resperpagelinks, resperpagelink($input,\%params,$_);
598 if ($params{values}{number}{final} =~ /^all$/i) {
599 push @resperpagelinks, "all";
601 push @resperpagelinks, resperpagelink($input, \%params,"all");
603 print join( " | ", @resperpagelinks )."</p>";
605 return ( $start, $end );
613 <p style="text-align:right;font-size:small;font-stlye:italic"><a href="$SEARCHPAGE">Packages search page</a></p>
618 my $pete = new Benchmark;
619 my $petd = timediff($pete, $pet0);
620 print "Total page evaluation took ".timestr($petd)."<br>"
622 print $input->end_html;