# Copyright (C) 2000, 2001 Josip Rodin
# Copyright (C) 2001 Adam Heath
# Copyright (C) 2004 Martin Schulze
-# Copyright (C) 2004 Frank Lichtenheld
+# Copyright (C) 2004-2006 Frank Lichtenheld
#
# use is allowed under the terms of the GNU Public License (GPL)
# see http://www.fsf.org/copyleft/gpl.html for a copy of the license
-require 5.001;
use strict;
use CGI qw( -oldstyle_urls );
-#use CGI::Carp qw( fatalsToBrowser );
+use CGI::Carp qw( fatalsToBrowser );
use POSIX;
use URI::Escape;
use HTML::Entities;
use Packages::Search qw( :all );
use Packages::HTML ();
-my $thisscript = "search_packages.pl";
-my $use_grep = 1;
+my $thisscript = $Packages::HTML::SEARCH_CGI;
my $HOME = "http://www.debian.org";
my $ROOT = "";
my $SEARCHPAGE = "http://packages.debian.org/";
my @SUITES = qw( oldstable stable testing unstable experimental );
-my @DISTS = @SUITES;
my @SECTIONS = qw( main contrib non-free );
my @ARCHIVES = qw( us security installer );
my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
kfreebsd-i386 mips mipsel powerpc s390 sparc );
+my %SUITES = map { $_ => 1 } @SUITES;
+my %SECTIONS = map { $_ => 1 } @SECTIONS;
+my %ARCHIVES = map { $_ => 1 } @ARCHIVES;
+my %ARCHITECTURES = map { $_ => 1 } @ARCHITECTURES;
$ENV{PATH} = "/bin:/usr/bin";
# Read in all the variables set by the form
-my $input = new CGI;
+my $input;
+if ($ARGV[0] && ($ARGV[0] eq 'php')) {
+ $input = new CGI(\*STDIN);
+} else {
+ $input = new CGI;
+}
my $pet0 = new Benchmark;
# use this to disable debugging in production mode completly
-my $debug_allowed = 0;
+my $debug_allowed = 1;
my $debug = $debug_allowed && $input->param("debug");
+$debug = 0 if not defined($debug);
$Search::Param::debug = 1 if $debug > 1;
# If you want, just print out a list of all of the variables and exit.
# print $input->dump;
# exit;
+if (my $path = $input->param('path')) {
+ my @components = map { lc $_ } split /\//, $path;
+
+ foreach (@components) {
+ if ($SUITES{$_}) {
+ $input->param('suite', $_);
+ } elsif ($SECTIONS{$_}) {
+ $input->param('section', $_);
+ } elsif ($ARCHIVES{$_}) {
+ $input->param('archive', $_);
+ }elsif ($ARCHITECTURES{$_}) {
+ $input->param('arch', $_);
+ }
+ }
+}
+
my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' },
suite => { default => 'stable', match => '^(\w+)$',
alias => 'version', array => ',',
my @archs = @{$params{values}{arch}{final}};
my $page = $params{values}{page}{final};
my $results_per_page = $params{values}{number}{final};
+my %opts = ( case_bool => $case_bool, exact => $exact );
# for URL construction
my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
# read the configuration
my $topdir;
if (!open (C, "../config.sh")) {
- print "\nInternal Error: Cannot open configuration file.\n\n" if $format eq 'html';
+ print "\nInternal Error: Cannot open configuration file.\n\n"
+if $format eq 'html';
exit 0;
}
while (<C>) {
}
close (C);
-my $FLATDIR = $topdir . "/files/flat";
+my $DBDIR = $topdir . "/files/db";
my $search_on_sources = 0;
-my %descr;
-my %sections;
-
-sub find_desc
-{
- my $pkg = shift;
- my $suite = shift;
- my $part = shift;
- my $descr = '';
-
- unless (exists $descr{$suite}{$part}) {
- $descr{$suite}{$part} = {};
- tie %{$descr{$suite}{$part}}, 'DB_File', "$FLATDIR/$suite/$part/Description", O_RDONLY
- or return "Error while loading descriptions database: $!";
- }
-
- return $descr{$suite}{$part}{$pkg};
-}
-
-sub find_section
-{
- my $pkg = shift;
- my $suite = shift;
- my $part = shift;
- my $section = '';
-
- unless (exists $sections{$suite}{$part}) {
- $sections{$suite}{$part} = {};
- tie %{$sections{$suite}{$part}}, 'DB_File', "$FLATDIR/$suite/$part/Section", O_RDONLY
- or return undef;
- }
-
- return $sections{$suite}{$part}{$pkg};
-}
-
my $st0 = new Benchmark;
-tie my %cache, 'DB_File', "$topdir/files/search.cache/search.cache", O_RDWR|O_CREAT or $use_cache = 0;
-my $cached;
my @results;
-my $cache_key = $keyword.$exact.$subword.$searchon.$suites_param.$sections_param.$archs_param;
+my $too_many_hits;
if ($searchon eq 'sourcenames') {
$search_on_sources = 1;
}
-if ($use_cache && ($cached = $cache{$cache_key})) {
- @results = split /\n/, $cached;
- print "DEBUG: Used cached results<br><pre>$cached</pre>" if $debug;
-} else {
- my $searchkeyword = $keyword;
- my $grep_searchkeyword = $keyword;
- $searchkeyword =~ s/[.]/\\./;
- if (($searchon eq 'names') || ($searchon eq 'sourcenames')) {
- # asserting that all package names are lower case
- $searchkeyword = lc($searchkeyword) unless $case_bool;
- $case_bool = 1;
- $grep_searchkeyword = "^[^ ]*$searchkeyword" unless $exact;
- $searchkeyword = "^\\S*$searchkeyword" unless $exact;
- } else {
- $grep_searchkeyword = "\\(^$searchkeyword\\b\\|\\b$searchkeyword\\b\\)"
- if $subword != 1;
- $searchkeyword = "\\b$searchkeyword\\b"
- if $subword != 1;
+
+my %suites = map { $_ => 1 } @suites;
+my %sections = map { $_ => 1 } @sections;
+my %archs = map { $_ => 1 } @archs;
+
+print "DEBUG: suites=@suites, sections=@sections, archs=@archs<br>"
+ if $debug > 2;
+
+sub read_entry {
+ my ($hash, $key, $results) = @_;
+ my $result = $hash->{$key};
+ foreach (split /\000/, $result) {
+ my @data = split ( /\s/, $_, 7 );
+ print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+ if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
+ && $sections{$data[2]}) {
+ print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+ push @$results, [ $key, @data ];
+ }
+ }
+}
+sub read_src_entry {
+ my ($hash, $key, $results) = @_;
+ my $result = $hash->{$key};
+
+ foreach (split /\000/, $result) {
+ my @data = split ( /\s/, $_, 5 );
+ print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+ if ($suites{$data[0]} && $sections{$data[1]}) {
+ print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+ push @$results, [ $key, @data ];
+ }
}
+}
+sub do_names_search {
+ my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_;
+ my @results;
+
+ $keyword = lc $keyword unless $opts->{case_bool};
-# FIXME
-# check if the Packages files are there
-#my @files = glob ("$fdir/$file");
-#if ($#files == -1) {
-# XXX has to be updated for new architectures
-# if ($format eq 'html') {
-# if (($version eq "stable" and $arch =~ /^(hurd|sh)$/)
-# || ($version eq "oldstable" and $arch =~ /^amd64$/)) {
-# print "Error: the $arch architecture didn't exist in $version.<br>\n"
-# ."Please go back and choose a different distribution.\n";
-# } else {
-# print "Error: Packages/Sources file not found.<br>\n"
-# ."If the problem persists, please inform $ENV{SERVER_ADMIN}.\n";
-# printf "<p>$file</p>";
-# }
-# &printfooter;
-# }
-# exit;
-#}
-
- my @files;
- foreach my $s (@suites) {
- foreach my $sec (@sections) {
- foreach my $a (@archs) {
- foreach my $archive (@ARCHIVES) {
- if (($searchon eq 'names' or $searchon eq 'sourcenames')
- and $exact) {
- my ( %packages, $file );
- if ($search_on_sources) {
- $file = "$FLATDIR/$s/$sec/Sources.$archive.db";
- } else {
- $file = "$FLATDIR/$s/$sec/Packages-$a.$archive.db";
- }
- if (-f $file) {
- print "DEBUG: Use file $file<br>"
- if $debug > 1;
-
- tie %packages, 'DB_File', $file, O_RDONLY
- or die "Couldn't open packages file $file: $!";
-
- if (my $data = $packages{$searchkeyword}) {
- print "DEBUG: Found result $data<br>"
- if $debug > 1;
- push @results, "$file:$data";
- }
- }
- } else {
- my $file;
- if ($search_on_sources) {
- $file = "$FLATDIR/$s/$sec/Sources.$archive";
- } else {
- $file = "$FLATDIR/$s/$sec/Packages-$a.$archive";
- }
- if (-f $file) {
- print "DEBUG: Use file $file<br>"
- if $debug > 1;
-
- # use_grep is currently way faster, though
- # I can't pinpoint exactly why, yet
- # most probably the perl regexes are
- # slow compared to the simpler grep
- # regexes
- unless ($use_grep) {
- open my $pkg_fh, '<', $file
- or die "Couldn't open packages file $file: $!";
-
- foreach (<$pkg_fh>) {
- if (/$searchkeyword/o) {
- print "DEBUG: Found result $_<br>"
- if $debug > 1;
-
- push @results, "$file:$_";
- }
- }
- } else {
- push @files, $file;
- }
- }
- }
+ my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$file: $!";
+
+ if ($opts->{exact}) {
+ &$read_entry( \%packages, $keyword, \@results );
+ } else {
+ my ($key, $prefixes) = ($keyword, '');
+ my %pkgs;
+ my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie postfix db $DBDIR/$postfix_file: $!";
+ $p_obj->seq( $key, $prefixes, R_CURSOR );
+ while (index($key, $keyword) >= 0) {
+ if ($prefixes =~ /^\001(\d+)/o) {
+ $too_many_hits += $1;
+ } else {
+ foreach (split /\000/o, $prefixes) {
+ $_ = '' if $_ eq '^';
+ print "DEBUG: add word $_$key<br>" if $debug > 2;
+ $pkgs{$_.$key}++;
}
}
+ last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0;
+ last if $too_many_hits or keys %pkgs >= 100;
+ }
+
+ my $no_results = keys %pkgs;
+ if ($too_many_hits || ($no_results >= 100)) {
+ $too_many_hits += $no_results;
+ %pkgs = ( $keyword => 1 );
+ }
+ foreach my $pkg (sort keys %pkgs) {
+ &$read_entry( \%packages, $pkg, \@results );
+ }
+ }
+ return \@results;
+}
+sub do_fulltext_search {
+ my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_;
+ my @results;
+
+ my @lines;
+ my $regex;
+ if ($opts->{case_bool}) {
+ if ($opts->{exact}) {
+ $regex = qr/\b\Q$keyword\E\b/o;
+ } else {
+ $regex = qr/\Q$keyword\E/o;
+ }
+ } else {
+ if ($exact) {
+ $regex = qr/\b\Q$keyword\E\b/io;
+ } else {
+ $regex = qr/\Q$keyword\E/io;
}
}
- if ($use_grep) {
- if (@files) {
- my @grep = ( 'grep', '-H' );
- push @grep, '-i' unless $case_bool;
- push @grep, $grep_searchkeyword;
- push @grep, @files;
-
- print "DEBUG: starting grep command '".
- substr("@grep",0,100)."[...]'<br>" if $debug;
- open my $grep_out, '-|', @grep or
- die "grep failed: $!";
- @results = <$grep_out>;
+ open DESC, '<', "$DBDIR/$file"
+ or die "couldn't open $DBDIR/$file: $!";
+ while (<DESC>) {
+ $_ =~ $regex or next;
+ print "DEBUG: Matched line $.<br>" if $debug > 2;
+ push @lines, $.;
+ }
+ close DESC;
+
+ tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$lookup: $!";
+ tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$mapping: $!";
+
+ my %tmp_results;
+ foreach my $l (@lines) {
+ my $result = $did2pkg{$l};
+ foreach (split /\000/o, $result) {
+ my @data = split /\s/, $_, 3;
+ next unless $archs{$data[2]};
+ $tmp_results{$data[0]}++;
}
}
-
- $cache{$cache_key} = join "", @results;
+ foreach my $pkg (keys %tmp_results) {
+ &$read_entry( \%packages, $pkg, \@results );
+ }
+ return \@results;
+}
+
+if ($searchon eq 'names') {
+ push @results, @{ do_names_search( $keyword, 'packages_small.db',
+ 'package_postfixes.db',
+ \&read_entry, \%opts ) };
+} elsif ($searchon eq 'sourcenames') {
+ push @results, @{ do_names_search( $keyword, 'sources_small.db',
+ 'source_postfixes.db',
+ \&read_src_entry, \%opts ) };
+} else {
+ push @results, @{ do_names_search( $keyword, 'packages_small.db',
+ 'package_postfixes.db',
+ \&read_entry, \%opts ) };
+ push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt',
+ 'descriptions_packages.db',
+ 'packages_small.db',
+ \&read_entry, \%opts ) };
}
my $st1 = new Benchmark;
}
}
+if ($too_many_hits) {
+print "<p><strong>Your search was too wide so we will only display exact matches. At least <em>$too_many_hits</em> results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.</strong></p>";
+}
+
if (!@results) {
if ($format eq 'html') {
my $keyword_esc = uri_escape( $keyword );
}
my (%pkgs, %sect, %part, %desc, %binaries);
-my (@colon, $package, $pkg_t, $section, $ver, $arch, $foo, $binaries);
unless ($search_on_sources) {
- foreach my $line (@results) {
- @colon = split (/:/, $line);
- ($pkg_t, $section, $ver, $arch, $foo) = split (/ /, $#colon >1 ? $colon[1].":".$colon[2]:$colon[1], 5);
- $section =~ s,^(non-free|contrib)/,,;
- $section =~ s,^non-US.*$,non-US,,;
- my ($dist,$part,undef) = $colon[0] =~ m,.*/([^/]+)/([^/]+)/Packages-([^\.]+)\.,; #$1=stable, $2=main, $3=alpha
-
- ($package) = $pkg_t =~ m/^(.+)/; # untaint
- $pkgs{$package}{$dist}{$ver}{$arch} = 1;
- $sect{$package}{$dist}{$ver} = $section;
- $part{$package}{$dist}{$ver} = $part unless $part eq 'main';
-
- $desc{$package}{$dist}{$ver} = find_desc ($package, $dist, $part) if (! exists $desc{$package}{$dist}{$ver});
+ foreach (@results) {
+ my ($pkg_t, $suite, $arch, $section, $subsection,
+ $priority, $version, $desc) = @$_;
+
+ my ($package) = $pkg_t =~ m/^(.+)/; # untaint
+ $pkgs{$package}{$suite}{$version}{$arch} = 1;
+ $sect{$package}{$suite}{$version} = $subsection;
+ $part{$package}{$suite}{$version} = $section unless $section eq 'main';
+
+ $desc{$package}{$suite}{$version} = $desc;
}
if ($format eq 'html') {
my ($start, $end) = multipageheader( scalar keys %pkgs );
my $count = 0;
-
+
foreach my $pkg (sort keys %pkgs) {
$count++;
next if $count < $start or $count > $end;
printf "<h3>Package %s</h3>\n", $pkg;
print "<ul>\n";
- foreach $ver (@SUITES) {
+ foreach my $ver (@SUITES) {
if (exists $pkgs{$pkg}{$ver}) {
my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
my $part_str = "";
$rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
my @triples;
foreach my $pkg (sort keys %pkgs) {
- foreach $ver (@DISTS) {
+ foreach my $ver (@SUITES) {
if (exists $pkgs{$pkg}{$ver}) {
my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
foreach my $version (@versions) {
print $rdf->serialise(@triples);
}
} else {
- foreach my $line (@results) {
- chomp($line);
- @colon = split (/:/, $line);
- ($package, $section, $ver, $binaries) = split (/ /, $#colon >1 ? $colon[1].":".$colon[2]:$colon[1], 4);
- $section =~ s,^(non-free|contrib)/,,;
- $section =~ s,^non-US.*$,non-US,,;
- $colon[0] =~ m,.*/([^/]+)/([^/]+)/Sources\.,; #$1=stable, $2=main
+ foreach (@results) {
+ my ($package, $suite, $section, $subsection, $priority,
+ $version, $binaries) = @$_;
- my ($suite, $part) = ($1, $2);
- $pkgs{$package}{$suite} = $ver;
- $sect{$package}{$suite}{source} = $section;
- $part{$package}{$suite}{source} = $part unless $part eq 'main';
+ $pkgs{$package}{$suite} = $version;
+ $sect{$package}{$suite}{source} = $subsection;
+ $part{$package}{$suite}{source} = $section unless $section eq 'main';
$binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ];
-
}
if ($format eq 'html') {
next if ($count < $start) or ($count > $end);
printf "<h3>Source package %s</h3>\n", $pkg;
print "<ul>\n";
- foreach $ver (@DISTS) {
+ foreach my $ver (@SUITES) {
if (exists $pkgs{$pkg}{$ver}) {
my $part_str = "";
if ($part{$pkg}{$ver}{source}) {
print "<br>Binary packages: ";
my @bp_links;
foreach my $bp (@{$binaries{$pkg}{$ver}}) {
- my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || '';
- $sect =~ s,^(non-free|contrib)/,,;
- $sect =~ s,^non-US.*$,non-US,,;
+ my $sect = 'section';
+
my $bp_link;
if ($sect) {
- $bp_link = sprintf "<a href=\"$ROOT/%s/%s/%s\">%s</a>", $ver, $sect, uri_escape( $bp ), $bp;
+ $bp_link = sprintf( "<a href=\"$ROOT/%s/%s/%s\">%s</a>",
+ $ver, $sect, uri_escape( $bp ), $bp );
} else {
$bp_link = $bp;
}
$rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
my @triples;
foreach my $pkg (sort keys %pkgs) {
- foreach $ver (@DISTS) {
+ foreach my $ver (@SUITES) {
if (exists $pkgs{$pkg}{$ver}) {
my $id = "$ROOT/$ver/source/$pkg";
my $index_line;
if ($no_results > $results_per_page) {
- $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results);
+ $index_line = prevlink($input,\%params)." | ".
+ indexline( $input, \%params, $no_results)." | ".
+ nextlink($input,\%params, $no_results);
print "<p style=\"text-align:center\">$index_line</p>";
}
</div>
END
+my $pete = new Benchmark;
+my $petd = timediff($pete, $pet0);
+print "Total page evaluation took ".timestr($petd)."<br>"
+ if $debug_allowed;
print $input->end_html;
}
+
+# vim: ts=8 sw=4