use Packages::Search qw( :all );
use Packages::HTML ();
-my $thisscript = "search_packages.pl";
+my $thisscript = $Packages::HTML::SEARCH_CGI;
my $HOME = "http://www.debian.org";
my $ROOT = "";
my $SEARCHPAGE = "http://packages.debian.org/";
my @SUITES = qw( oldstable stable testing unstable experimental );
-my @DISTS = @SUITES;
my @SECTIONS = qw( main contrib non-free );
my @ARCHIVES = qw( us security installer );
my @ARCHITECTURES = qw( alpha amd64 arm hppa hurd-i386 i386 ia64
$ENV{PATH} = "/bin:/usr/bin";
# Read in all the variables set by the form
-my $input = new CGI;
+my $input;
+if ($ARGV[0] && ($ARGV[0] eq 'php')) {
+ $input = new CGI(\*STDIN);
+} else {
+ $input = new CGI;
+}
my $pet0 = new Benchmark;
# use this to disable debugging in production mode completly
my $debug_allowed = 1;
my $debug = $debug_allowed && $input->param("debug");
-$Search::Param::debug = 1 if $debug > 1;
+$debug = 0 if not defined($debug);
+$Packages::Search::debug = 1 if $debug > 1;
# If you want, just print out a list of all of the variables and exit.
print $input->header if $debug;
}
}
-my %params_def = ( keywords => { default => undef, match => '^\s*([-+\@\w\/.:]+)\s*$' },
+my ( $format, $keyword, $case, $subword, $exact, $searchon,
+ @suites, @sections, @archs );
+
+my %params_def = ( keywords => { default => undef,
+ match => '^\s*([-+\@\w\/.:]+)\s*$',
+ var => \$keyword },
suite => { default => 'stable', match => '^(\w+)$',
alias => 'version', array => ',',
+ var => \@suites,
replace => { all => \@SUITES } },
- case => { default => 'insensitive', match => '^(\w+)$' },
- official => { default => 0, match => '^(\w+)$' },
- use_cache => { default => 1, match => '^(\w+)$' },
- subword => { default => 0, match => '^(\w+)$' },
- exact => { default => undef, match => '^(\w+)$' },
- searchon => { default => 'all', match => '^(\w+)$' },
+ case => { default => 'insensitive', match => '^(\w+)$',
+ var => \$case },
+# official => { default => 0, match => '^(\w+)$' },
+# use_cache => { default => 1, match => '^(\w+)$' },
+ subword => { default => 0, match => '^(\w+)$',
+ var => \$subword },
+ exact => { default => undef, match => '^(\w+)$',
+ var => \$exact },
+ searchon => { default => 'all', match => '^(\w+)$',
+ var => \$searchon },
section => { default => 'all', match => '^([\w-]+)$',
alias => 'release', array => ',',
+ var => \@sections,
replace => { all => \@SECTIONS } },
arch => { default => 'any', match => '^(\w+)$',
- array => ',', replace =>
+ array => ',', var => \@archs, replace =>
{ any => \@ARCHITECTURES } },
archive => { default => 'all', match => '^(\w+)$',
array => ',', replace =>
{ all => \@ARCHIVES } },
- format => { default => 'html', match => '^(\w+)$' },
+ format => { default => 'html', match => '^(\w+)$',
+ var => \$format },
);
-my %params = Packages::Search::parse_params( $input, \%params_def );
+my %opts;
+my %params = Packages::Search::parse_params( $input, \%params_def, \%opts );
-my $format = $params{values}{format}{final};
#XXX: Don't use alternative output formats yet
$format = 'html';
print "Error: keyword not valid or missing" if $format eq 'html';
exit 0;
}
-my $keyword = $params{values}{keywords}{final};
-my @suites = @{$params{values}{suite}{final}};
-my $official = $params{values}{official}{final};
-my $use_cache = $params{values}{use_cache}{final};
-my $case = $params{values}{case}{final};
+
my $case_bool = ( $case !~ /insensitive/ );
-my $subword = $params{values}{subword}{final};
-my $exact = $params{values}{exact}{final};
$exact = !$subword unless defined $exact;
-my $searchon = $params{values}{searchon}{final};
-my @sections = @{$params{values}{section}{final}};
-my @archs = @{$params{values}{arch}{final}};
-my $page = $params{values}{page}{final};
-my $results_per_page = $params{values}{number}{final};
+$opts{h_suites} = { map { $_ => 1 } @suites };
+$opts{h_sections} = { map { $_ => 1 } @sections };
+$opts{h_archs} = { map { $_ => 1 } @archs };
# for URL construction
my $suites_param = join ',', @{$params{values}{suite}{no_replace}};
my $st0 = new Benchmark;
my @results;
+my $too_many_hits;
if ($searchon eq 'sourcenames') {
$search_on_sources = 1;
}
-my %suites = map { $_ => 1 } @suites;
-my %sections = map { $_ => 1 } @sections;
-my %archs = map { $_ => 1 } @archs;
-
-print "DEBUG: suites=@suites, sections=@sections, archs=@archs<br>" if $debug > 2;
-
-if ($searchon eq 'names') {
+sub read_entry {
+ my ($hash, $key, $results, $opts) = @_;
+ my $result = $hash->{$key} || '';
+ foreach (split /\000/, $result) {
+ my @data = split ( /\s/, $_, 7 );
+ print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+ if ($opts->{h_suites}{$data[0]}
+ && ($opts->{h_archs}{$data[1]} || $data[1] eq 'all')
+ && $opts->{h_sections}{$data[2]}) {
+ print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+ push @$results, [ $key, @data ];
+ }
+ }
+}
+sub read_src_entry {
+ my ($hash, $key, $results, $opts) = @_;
+ my $result = $hash->{$key} || '';
+ foreach (split /\000/, $result) {
+ my @data = split ( /\s/, $_, 5 );
+ print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+ if ($opts->{h_suites}{$data[0]} && $opts->{h_sections}{$data[1]}) {
+ print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+ push @$results, [ $key, @data ];
+ }
+ }
+}
+sub do_names_search {
+ my ($keyword, $file, $postfix_file, $read_entry, $opts) = @_;
+ my @results;
- $keyword = lc $keyword unless $case_bool;
+ $keyword = lc $keyword unless $opts->{case_bool};
- my %packages;
- tie %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE
- or die "couldn't tie DB $DBDIR/packages_small.db: $!";
+ my $obj = tie my %packages, 'DB_File', "$DBDIR/$file", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$file: $!";
- my $result = $packages{$keyword};
- foreach (split /\000/, $result) {
- my @data = split ( /\s/, $_, 6 );
- #FIXME, should be done on db generation
- if ($data[2] =~ m,/,) {
- $data[2] =~ s,/.*$,,;
+ if ($opts->{exact}) {
+ &$read_entry( \%packages, $keyword, \@results, $opts );
+ } else {
+ my ($key, $prefixes) = ($keyword, '');
+ my %pkgs;
+ my $p_obj = tie my %pref, 'DB_File', "$DBDIR/$postfix_file", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie postfix db $DBDIR/$postfix_file: $!";
+ $p_obj->seq( $key, $prefixes, R_CURSOR );
+ while (index($key, $keyword) >= 0) {
+ if ($prefixes =~ /^\001(\d+)/o) {
+ $too_many_hits += $1;
+ } else {
+ foreach (split /\000/o, $prefixes) {
+ $_ = '' if $_ eq '^';
+ print "DEBUG: add word $_$key<br>" if $debug > 2;
+ $pkgs{$_.$key}++;
+ }
+ }
+ last if $p_obj->seq( $key, $prefixes, R_NEXT ) != 0;
+ last if $too_many_hits or keys %pkgs >= 100;
+ }
+
+ my $no_results = keys %pkgs;
+ if ($too_many_hits || ($no_results >= 100)) {
+ $too_many_hits += $no_results;
+ %pkgs = ( $keyword => 1 );
+ }
+ foreach my $pkg (sort keys %pkgs) {
+ &$read_entry( \%packages, $pkg, \@results, $opts );
+ }
+ }
+ return \@results;
+}
+sub do_fulltext_search {
+ my ($keword, $file, $mapping, $lookup, $read_entry, $opts) = @_;
+ my @results;
+
+ my @lines;
+ my $regex;
+ if ($opts->{case_bool}) {
+ if ($opts->{exact}) {
+ $regex = qr/\b\Q$keyword\E\b/o;
} else {
- $data[2] = 'main';
+ $regex = qr/\Q$keyword\E/o;
}
- print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
- if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
- && $sections{$data[2]}) {
- print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
- push @results, [ $keyword, @data ];
+ } else {
+ if ($exact) {
+ $regex = qr/\b\Q$keyword\E\b/io;
+ } else {
+ $regex = qr/\Q$keyword\E/io;
+ }
+ }
+
+ open DESC, '<', "$DBDIR/$file"
+ or die "couldn't open $DBDIR/$file: $!";
+ while (<DESC>) {
+ $_ =~ $regex or next;
+ print "DEBUG: Matched line $.<br>" if $debug > 2;
+ push @lines, $.;
+ }
+ close DESC;
+
+ tie my %packages, 'DB_File', "$DBDIR/$lookup", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$lookup: $!";
+ tie my %did2pkg, 'DB_File', "$DBDIR/$mapping", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't tie DB $DBDIR/$mapping: $!";
+
+ my %tmp_results;
+ foreach my $l (@lines) {
+ my $result = $did2pkg{$l};
+ foreach (split /\000/o, $result) {
+ my @data = split /\s/, $_, 3;
+ next unless $opts->{h_archs}{$data[2]};
+ $tmp_results{$data[0]}++;
}
}
+ foreach my $pkg (keys %tmp_results) {
+ &$read_entry( \%packages, $pkg, \@results, $opts );
+ }
+ return \@results;
+}
+
+sub find_binaries {
+ my ($pkg, $suite) = @_;
+
+ tie my %src2bin, 'DB_File', "$DBDIR/sources_packages.db", O_RDONLY, 0666, $DB_BTREE
+ or die "couldn't open $DBDIR/sources_packages.db: $!";
+
+ my $bins = $src2bin{$pkg} || '';
+ my %bins;
+ foreach (split /\000/o, $bins) {
+ my @data = split /\s/, $_, 4;
+
+ if ($data[0] eq $suite) {
+ $bins{$data[1]}++;
+ }
+ }
+
+ return [ keys %bins ];
+}
+
+if ($searchon eq 'names') {
+ push @results, @{ do_names_search( $keyword, 'packages_small.db',
+ 'package_postfixes.db',
+ \&read_entry, \%opts ) };
+} elsif ($searchon eq 'sourcenames') {
+ push @results, @{ do_names_search( $keyword, 'sources_small.db',
+ 'source_postfixes.db',
+ \&read_src_entry, \%opts ) };
+} else {
+ push @results, @{ do_names_search( $keyword, 'packages_small.db',
+ 'package_postfixes.db',
+ \&read_entry, \%opts ) };
+ push @results, @{ do_fulltext_search( $keyword, 'descriptions.txt',
+ 'descriptions_packages.db',
+ 'packages_small.db',
+ \&read_entry, \%opts ) };
}
my $st1 = new Benchmark;
}
}
+if ($too_many_hits) {
+ print "<p><strong>Your search was too wide so we will only display exact matches. At least <em>$too_many_hits</em> results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.</strong></p>";
+}
+
if (!@results) {
if ($format eq 'html') {
my $keyword_esc = uri_escape( $keyword );
unless ($search_on_sources) {
foreach (@results) {
- my ($pkg_t, $suite, $arch, $section, $priority, $version, $desc) = @$_;
+ my ($pkg_t, $suite, $arch, $section, $subsection,
+ $priority, $version, $desc) = @$_;
my ($package) = $pkg_t =~ m/^(.+)/; # untaint
$pkgs{$package}{$suite}{$version}{$arch} = 1;
- $sect{$package}{$suite}{$version} = 'subsection';
+ $sect{$package}{$suite}{$version} = $subsection;
$part{$package}{$suite}{$version} = $section unless $section eq 'main';
$desc{$package}{$suite}{$version} = $desc;
-
}
if ($format eq 'html') {
}
print "</ul>\n";
}
- } elsif ($format eq 'xml') {
- require RDF::Simple::Serialiser;
- my $rdf = new RDF::Simple::Serialiser;
- $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
- my @triples;
- foreach my $pkg (sort keys %pkgs) {
- foreach my $ver (@DISTS) {
- if (exists $pkgs{$pkg}{$ver}) {
- my @versions = version_sort keys %{$pkgs{$pkg}{$ver}};
- foreach my $version (@versions) {
- my $id = "$ROOT/$ver/$sect{$pkg}{$ver}{$version}/$pkg/$version";
- push @triples, [ $id, 'debpkg:package', $pkg ];
- push @triples, [ $id, 'debpkg:version', $version ];
- push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{$version}, ];
- push @triples, [ $id, 'debpkg:suite', $ver ];
- push @triples, [ $id, 'debpkg:shortdesc', $desc{$pkg}{$ver}{$version} ];
- push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{$version} || 'main' ];
- foreach my $arch (sort keys %{$pkgs{$pkg}{$ver}{$version}}) {
- push @triples, [ $id, 'debpkg:architecture', $arch ];
- }
- }
- }
- }
- }
-
- print $rdf->serialise(@triples);
}
} else {
foreach (@results) {
- my ($package, $suite, $section, $version, $binaries);
+ my ($package, $suite, $section, $subsection, $priority,
+ $version) = @$_;
$pkgs{$package}{$suite} = $version;
- $sect{$package}{$suite}{source} = 'subsection';
+ $sect{$package}{$suite}{source} = $subsection;
$part{$package}{$suite}{source} = $section unless $section eq 'main';
- $binaries{$package}{$suite} = [ sort split( /\s*,\s*/, $binaries ) ];
-
+ $binaries{$package}{$suite} = find_binaries( $package, $suite );
}
if ($format eq 'html') {
print "<br>Binary packages: ";
my @bp_links;
foreach my $bp (@{$binaries{$pkg}{$ver}}) {
- my $sect = find_section($bp, $ver, $part{$pkg}{$ver}{source}||'main') || '';
- $sect =~ s,^(non-free|contrib)/,,;
- $sect =~ s,^non-US.*$,non-US,,;
- my $bp_link;
- if ($sect) {
- $bp_link = sprintf "<a href=\"$ROOT/%s/%s/%s\">%s</a>", $ver, $sect, uri_escape( $bp ), $bp;
- } else {
- $bp_link = $bp;
- }
+ my $bp_link = sprintf( "<a href=\"$ROOT/%s/%s\">%s</a>",
+ $ver, uri_escape( $bp ), $bp );
push @bp_links, $bp_link;
}
print join( ", ", @bp_links );
}
print "</ul>\n";
}
- } elsif ($format eq 'xml') {
- require RDF::Simple::Serialiser;
- my $rdf = new RDF::Simple::Serialiser;
- $rdf->addns( debpkg => 'http://packages.debian.org/xml/01-debian-packages-rdf' );
- my @triples;
- foreach my $pkg (sort keys %pkgs) {
- foreach my $ver (@SUITES) {
- if (exists $pkgs{$pkg}{$ver}) {
- my $id = "$ROOT/$ver/source/$pkg";
-
- push @triples, [ $id, 'debpkg:package', $pkg ];
- push @triples, [ $id, 'debpkg:type', 'source' ];
- push @triples, [ $id, 'debpkg:section', $sect{$pkg}{$ver}{source} ];
- push @triples, [ $id, 'debpkg:version', $pkgs{$pkg}{$ver} ];
- push @triples, [ $id, 'debpkg:part', $part{$pkg}{$ver}{source} || 'main' ];
-
- foreach my $bp (@{$binaries{$pkg}{$ver}}) {
- push @triples, [ $id, 'debpkg:binary', $bp ];
- }
- }
- }
- }
- print $rdf->serialise(@triples);
}
}
my $no_results = shift;
my $index_line;
- if ($no_results > $results_per_page) {
+ if ($no_results > $opts{number}) {
- $index_line = prevlink($input,\%params)." | ".indexline( $input, \%params, $no_results)." | ".nextlink($input,\%params, $no_results);
+ $index_line = prevlink($input,\%params)." | ".
+ indexline( $input, \%params, $no_results)." | ".
+ nextlink($input,\%params, $no_results);
print "<p style=\"text-align:center\">$index_line</p>";
}
my $no_results = shift;
my ($start, $end);
- if ($results_per_page =~ /^all$/i) {
+ if ($opts{number} =~ /^all$/i) {
$start = 1;
$end = $no_results;
- $results_per_page = $no_results;
+ $opts{number} = $no_results;
} else {
$start = Packages::Search::start( \%params );
$end = Packages::Search::end( \%params );
print "<p>Results per page: ";
my @resperpagelinks;
for (50, 100, 200) {
- if ($results_per_page == $_) {
+ if ($opts{number} == $_) {
push @resperpagelinks, $_;
} else {
push @resperpagelinks, resperpagelink($input,\%params,$_);
</div>
END
+my $pete = new Benchmark;
+my $petd = timediff($pete, $pet0);
+print "Total page evaluation took ".timestr($petd)."<br>"
+ if $debug_allowed;
print $input->end_html;
}
+
+# vim: ts=8 sw=4