3 # search_packages.pl -- CGI interface to the Packages files on packages.debian.org
5 # Copyright (C) 1998 James Treacy
6 # Copyright (C) 2000, 2001 Josip Rodin
7 # Copyright (C) 2001 Adam Heath
8 # Copyright (C) 2004 Martin Schulze
9 # Copyright (C) 2004-2006 Frank Lichtenheld
11 # use is allowed under the terms of the GNU Public License (GPL)
12 # see http://www.fsf.org/copyleft/gpl.html for a copy of the license
15 use CGI qw( -oldstyle_urls );
16 use CGI::Carp qw( fatalsToBrowser );
26 use Packages::Search qw( :all );
27 use Packages::HTML ();
29 my $thisscript = "search_packages.pl";
30 my $HOME = "http://www.debian.org";
32 my $SEARCHPAGE = "http://packages.debian.org/";
33 my @SUITES = qw( oldstable stable testing unstable experimental );
35 my @SECTIONS = qw( main contrib non-free );
36 my @ARCHIVES = qw( us security installer );
37 my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
38 kfreebsd-i386 mips mipsel powerpc s390 sparc );
39 my %SUITES = map { $_ => 1 } @SUITES;
40 my %SECTIONS = map { $_ => 1 } @SECTIONS;
41 my %ARCHIVES = map { $_ => 1 } @ARCHIVES;
42 my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES;
44 $ENV{PATH} = "/bin:/usr/bin";
46 # Read in all the variables set by the form
49 my $pet0 = new Benchmark;
50 # use this to disable debugging in production mode completly
51 my $debug_allowed = 1;
52 my $debug = $debug_allowed && $input->param("debug");
53 $Search::Param::debug = 1 if $debug > 1;
55 # If you want, just print out a list of all of the variables and exit.
56 print $input->header if $debug;
60 if (my $path = $input->param('path')) {
61 my @components = map { lc $_ } split /\//, $path;
63 foreach (@components) {
65 $input->param('suite', $_);
66 } elsif ($SECTIONS{$_}) {
67 $input->param('section', $_);
68 } elsif ($ARCHIVES{$_}) {
69 $input->param('archive', $_);
70 }elsif ($ARCHITECTURES{$_}) {
71 $input->param('arch', $_);
76 my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' },
77 suite => { default => 'stable', match => '^(\w+)$',
78 alias => 'version', array => ',',
79 replace => { all => \@SUITES } },
80 case => { default => 'insensitive', match => '^(\w+)$' },
81 official => { default => 0, match => '^(\w+)$' },
82 use_cache => { default => 1, match => '^(\w+)$' },
83 subword => { default => 0, match => '^(\w+)$' },
84 exact => { default => undef, match => '^(\w+)$' },
85 searchon => { default => 'all', match => '^(\w+)$' },
86 section => { default => 'all', match => '^([\w-]+)$',
87 alias => 'release', array => ',',
88 replace => { all => \@SECTIONS } },
89 arch => { default => 'any', match => '^(\w+)$',
90 array => ',', replace =>
91 { any => \@ARCHITECTURES } },
92 archive => { default => 'all', match => '^(\w+)$',
93 array => ',', replace =>
94 { all => \@ARCHIVES } },
95 format => { default => 'html', match => '^(\w+)$' },
97 my %params = Packages::Search::parse_params( $input, \%params_def );
99 my $format = $params{values}{format}{final};
100 #XXX: Don't use alternative output formats yet
103 if ($format eq 'html') {
104 print $input->header;
105 } elsif ($format eq 'xml') {
106 # print $input->header( -type=>'application/rdf+xml' );
107 print $input->header( -type=>'text/plain' );
110 if ($params{errors}{keywords}) {
111 print "Error: keyword not valid or missing" if $format eq 'html';
114 my $keyword = $params{values}{keywords}{final};
115 my @suites = @{$params{values}{suite}{final}};
116 my $official = $params{values}{official}{final};
117 my $use_cache = $params{values}{use_cache}{final};
118 my $case = $params{values}{case}{final};
119 my $case_bool = ( $case !~ /insensitive/ );
120 my $subword = $params{values}{subword}{final};
121 my $exact = $params{values}{exact}{final};
122 $exact = !$subword unless defined $exact;
123 my $searchon = $params{values}{searchon}{final};
124 my @sections = @{$params{values}{section}{final}};
125 my @archs = @{$params{values}{arch}{final}};
126 my $page = $params{values}{page}{final};
127 my $results_per_page = $params{values}{number}{final};
129 # for URL construction
130 my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
131 my $sections_param = join ',', @{$params{values}{section}{no_replace}};
132 my $archs_param = join ',', @{$params{values}{arch}{no_replace}};
135 my $keyword_enc = encode_entities $keyword;
136 my $searchon_enc = encode_entities $searchon;
137 my $suites_enc = encode_entities join ', ', @{$params{values}{suite}{no_replace}};
138 my $sections_enc = encode_entities join ', ', @{$params{values}{section}{no_replace}};
139 my $archs_enc = encode_entities join ', ', @{$params{values}{arch}{no_replace}};
140 my $pet1 = new Benchmark;
141 my $petd = timediff($pet1, $pet0);
142 print "DEBUG: Parameter evaluation took ".timestr($petd)."<br>" if $debug;
144 if ($format eq 'html') {
145 print Packages::HTML::header( title => 'Package Search Results' ,
147 title_tag => 'Debian Package Search Results',
148 print_title_above => 1,
149 print_search_field => 'packages',
150 search_field_values => {
151 keywords => $keyword_enc,
152 searchon => $searchon,
154 suite => $suites_enc,
155 section => $sections_enc,
163 # read the configuration
165 if (!open (C, "../config.sh")) {
166 print "\nInternal Error: Cannot open configuration file.\n\n"
167 if $format eq 'html';
171 $topdir = $1 if (/^\s*topdir="?(.*)"?\s*$/);
175 my $DBDIR = $topdir . "/files/db";
176 my $search_on_sources = 0;
178 my $st0 = new Benchmark;
180 if ($searchon eq 'sourcenames') {
181 $search_on_sources = 1;
184 my %suites = map { $_ => 1 } @suites;
185 my %sections = map { $_ => 1 } @sections;
186 my %archs = map { $_ => 1 } @archs;
188 print "DEBUG: suites=@suites, sections=@sections, archs=@archs<br>" if $debug > 2;
190 if ($searchon eq 'names') {
192 $keyword = lc $keyword unless $case_bool;
194 my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE
195 or die "couldn't tie DB $DBDIR/packages_small.db: $!";
198 my $result = $packages{$keyword};
199 foreach (split /\000/, $result) {
200 my @data = split ( /\s/, $_, 6 );
201 #FIXME, should be done on db generation
202 if ($data[2] =~ m,/,) {
203 $data[2] =~ s,/.*$,,;
207 print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
208 if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
209 && $sections{$data[2]}) {
210 print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
211 push @results, [ $keyword, @data ];
215 while (my ($pkg, $result) = each %packages) {
216 #what's faster? I can't really see a difference
217 (index($pkg, $keyword) >= 0) or next;
218 #$pkg =~ /\Q$keyword\E/ or next;
219 foreach (split /\000/, $packages{$pkg}) {
220 my @data = split ( /\s/, $_, 6 );
221 #FIXME, should be done on db generation
222 if ($data[2] =~ m,/,) {
223 $data[2] =~ s,/.*$,,;
227 print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
228 if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
229 && $sections{$data[2]}) {
230 print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
231 push @results, [ $pkg , @data ];
238 my $st1 = new Benchmark;
239 my $std = timediff($st1, $st0);
240 print "DEBUG: Search took ".timestr($std)."<br>" if $debug;
242 if ($format eq 'html') {
243 my $suite_wording = $suites_enc eq "all" ? "all suites"
244 : "suite(s) <em>$suites_enc</em>";
245 my $section_wording = $sections_enc eq 'all' ? "all sections"
246 : "section(s) <em>$sections_enc</em>";
247 my $arch_wording = $archs_enc eq 'any' ? "all architectures"
248 : "architecture(s) <em>$archs_enc</em>";
249 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
250 my $source_wording = $search_on_sources ? "source " : "";
251 my $exact_wording = $exact ? "named" : "that names contain";
252 print "<p>You have searched for ${source_wording}packages $exact_wording <em>$keyword_enc</em> in $suite_wording, $section_wording, and $arch_wording.</p>";
254 my $exact_wording = $exact ? "" : " (including subword matching)";
255 print "<p>You have searched for <em>$keyword_enc</em> in packages names and descriptions in $suite_wording, $section_wording, and $arch_wording$exact_wording.</p>";
260 if ($format eq 'html') {
261 my $keyword_esc = uri_escape( $keyword );
263 if (($searchon eq "names") || ($searchon eq 'sourcenames')) {
264 if (($suites_enc eq 'all')
265 && ($archs_enc eq 'any')
266 && ($sections_enc eq 'all')) {
267 print "<p><strong>Can't find that package.</strong></p>\n";
269 print "<p><strong>Can't find that package, at least not in that suite ".
270 ( $search_on_sources ? "" : " and on that architecture" ).
276 print "<p>You have searched only for exact matches of the package name. You can try to search for <a href=\"$thisscript?exact=0&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">package names that contain your search string</a>.</p>";
279 if (($suites_enc eq 'all')
280 && ($archs_enc eq 'any')
281 && ($sections_enc eq 'all')) {
282 print "<p><strong>Can't find that string.</strong></p>\n";
284 print "<p><strong>Can't find that string, at least not in that suite ($suites_enc, section $sections_enc) and on that architecture ($archs_enc).</strong></p>\n";
289 print "<p>You have searched only for words exactly matching your keywords. You can try to search <a href=\"$thisscript?subword=1&searchon=$searchon&suite=$suites_param&case=$case&section=$sections_param&keywords=$keyword_esc&arch=$archs_param\">allowing subword matching</a>.</p>";
292 print "<p>".( $printed ? "Or you" : "You" )." can try a different search on the <a href=\"$SEARCHPAGE#search_packages\">Packages search page</a>.</p>";
299 my (%pkgs, %sect, %part, %desc, %binaries);
301 unless ($search_on_sources) {
303 my ($pkg_t, $suite, $arch, $section, $priority, $version, $desc) = @$_;
305 my ($package) = $pkg_t =~ m/^(.+)/; # untaint
306 $pkgs{$package}{$suite}{$version}{$arch} = 1;
307 $sect{$package}{$suite}{$version} = 'subsection';
308 $part{$package}{$suite}{$version} = $section unless $section eq 'main';
310 $desc{$package}{$suite}{$version} = $desc;
314 if ($format eq 'html') {
315 my ($start, $end) = multipageheader( scalar keys %pkgs );
318 foreach my $pkg (sort keys %pkgs) {
320 next if $count < $start or $count > $end;
321 printf "<h3>Package %s</h3>\n", $pkg;
323 foreach my $ver (@SUITES) {
324 if (exists $pkgs{$pkg}{$ver}) {
325 my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
327 if ($part{$pkg}{$ver}{$versions[0]}) {
328 $part_str = "[<span style=\"color:red\">$part{$pkg}{$ver}{$versions[0]}</span>]";
330 printf "<li><a href=\"$ROOT/%s/%s/%s\">%s</a> (%s): %s %s\n",
331 $ver, $sect{$pkg}{$ver}{$versions[0]}, $pkg, $ver, $sect{$pkg}{$ver}{$versions[0]}, $desc{$pkg}{$ver}{$versions[0]}, $part_str;
333 foreach my $v (@versions) {
334 printf "<br>%s: %s\n",
335 $v, join (" ", (sort keys %{$pkgs{$pkg}{$ver}{$v}}) );
342 } elsif ($format eq 'xml') {
343 require RDF::Simple::Serialiser;
344 my $rdf = new RDF::Simple::Serialiser;
345 $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
347 foreach my $pkg (sort keys %pkgs) {
348 foreach my $ver (@DISTS) {
349 if (exists $pkgs{$pkg}{$ver}) {
350 my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
351 foreach my $version (@versions) {
352 my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version";
353 push @triples, [ $id, 'debpkg:package', $pkg ];
354 push @triples, [ $id, 'debpkg:version', $version ];
355 push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ];
356 push @triples, [ $id, 'debpkg:suite', $ver ];
357 push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ];
358 push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ];
359 foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) {
360 push @triples, [ $id, 'debpkg:architecture', $arch ];
367 print $rdf->serialise(@triples);
371 my ($package, $suite, $section, $version, $binaries);
373 $pkgs{$package}{$suite} = $version;
374 $sect{$package}{$suite}{source} = 'subsection';
375 $part{$package}{$suite}{source} = $section unless $section eq 'main';
377 $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ];
381 if ($format eq 'html') {
382 my ($start, $end) = multipageheader( scalar keys %pkgs );
385 foreach my $pkg (sort keys %pkgs) {
387 next if ($count < $start) or ($count > $end);
388 printf "<h3>Source package %s</h3>\n", $pkg;
390 foreach my $ver (@SUITES) {
391 if (exists $pkgs{$pkg}{$ver}) {
393 if ($part{$pkg}{$ver}{source}) {
394 $part_str = "[<span style=\"color:red\">$part{$pkg}{$ver}{source}</span>]";
396 printf "<li><a href=\"$ROOT/%s/source/%s\">%s</a> (%s): %s %s", $ver, $pkg, $ver, $sect{$pkg}{$ver}{source}, $pkgs{$pkg}{$ver}, $part_str;
398 print "<br>Binary packages: ";
400 foreach my $bp (@{$binaries{$pkg}{$ver}}) {
401 my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || '';
402 $sect =~ s,^(non-free|contrib)/,,;
403 $sect =~ s,^non-US.*$,non-US,,;
406 $bp_link = sprintf "<a href=\"$ROOT/%s/%s/%s\">%s</a>", $ver, $sect, uri_escape( $bp ), $bp;
410 push @bp_links, $bp_link;
412 print join( ", ", @bp_links );
418 } elsif ($format eq 'xml') {
419 require RDF::Simple::Serialiser;
420 my $rdf = new RDF::Simple::Serialiser;
421 $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
423 foreach my $pkg (sort keys %pkgs) {
424 foreach my $ver (@SUITES) {
425 if (exists $pkgs{$pkg}{$ver}) {
426 my $id = "$ROOT/$ver/source/$pkg";
428 push @triples, [ $id, 'debpkg:package', $pkg ];
429 push @triples, [ $id, 'debpkg:type', 'source' ];
430 push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{source} ];
431 push @triples, [ $id, 'debpkg:version', $pkgs{$pkg}{$ver} ];
432 push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{source} || 'main' ];
434 foreach my $bp (@{$binaries{$pkg}{$ver}}) {
435 push @triples, [ $id, 'debpkg:binary', $bp ];
440 print $rdf->serialise(@triples);
444 if ($format eq 'html') {
445 &printindexline( scalar keys %pkgs );
452 my $no_results = shift;
455 if ($no_results > $results_per_page) {
457 $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results);
459 print "<p style=\"text-align:center\">$index_line</p>";
463 sub multipageheader {
464 my $no_results = shift;
467 if ($results_per_page =~ /^all$/i) {
470 $results_per_page = $no_results;
472 $start = Packages::Search::start( \%params );
473 $end = Packages::Search::end( \%params );
474 if ($end > $no_results) { $end = $no_results; }
477 print "<p>Found <em>$no_results</em> matching packages,";
478 if ($end == $start) {
479 print " displaying package $end.</p>";
481 print " displaying packages $start to $end.</p>";
484 printindexline( $no_results );
486 if ($no_results > 100) {
487 print "<p>Results per page: ";
490 if ($results_per_page == $_) {
491 push @resperpagelinks, $_;
493 push @resperpagelinks, resperpagelink($input,\%params,$_);
496 if ($params{values}{number}{final} =~ /^all$/i) {
497 push @resperpagelinks, "all";
499 push @resperpagelinks, resperpagelink($input, \%params,"all");
501 print join( " | ", @resperpagelinks )."</p>";
503 return ( $start, $end );
511 <p style="text-align:right;font-size:small;font-stlye:italic"><a href="$SEARCHPAGE">Packages search page</a></p>
516 print $input->end_html;