From 1fceab61ae04b085395167ede490a9d0eab02b11 Mon Sep 17 00:00:00 2001 From: Jeroen van Wolffelaar Date: Mon, 20 Feb 2006 15:26:24 +0000 Subject: [PATCH] Fix contents search my moving it to lib, like the rest. Still a bit rough. --- cgi-bin/dispatcher.pl | 6 +- cgi-bin/search_contents.pl | 78 --------------- lib/Packages/DoSearchContents.pm | 159 +++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 79 deletions(-) delete mode 100755 cgi-bin/search_contents.pl create mode 100644 lib/Packages/DoSearchContents.pm diff --git a/cgi-bin/dispatcher.pl b/cgi-bin/dispatcher.pl index 0de4f1c..ea7c3e2 100755 --- a/cgi-bin/dispatcher.pl +++ b/cgi-bin/dispatcher.pl @@ -28,6 +28,7 @@ use Packages::HTML (); use Packages::Sections; use Packages::DoSearch; +use Packages::DoSearchContents; use Packages::DoShow; use Packages::DoDownload; use Packages::DoFilelist; @@ -57,7 +58,7 @@ $Packages::CGI::debug = $debug; my $what_to_do = 'show'; my $source = 0; -if (my $path = $input->path_info()) { +if (my $path = $input->path_info() || $input->param('PATH_INFO')) { my @components = grep { $_ } map { lc $_ } split /\/+/, $path; debug( "components[0]=$components[0]", 2 ); @@ -182,6 +183,9 @@ if ((($opts{searchon} eq 'names') && $opts{source}) || } else { $opts{searchon_form} = $opts{searchon}; } +if ($opts{searchon} eq 'contents' or $opts{searchon} eq 'filenames') { + $what_to_do = 'search_contents'; +} my $pet1 = new Benchmark; my $petd = timediff($pet1, $pet0); diff --git a/cgi-bin/search_contents.pl b/cgi-bin/search_contents.pl deleted file mode 100755 index 66e216d..0000000 --- a/cgi-bin/search_contents.pl +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/perl -wT -# $Id$ -# search_contents.pl -- CGI interface to the Contents files on packages.debian.org -# -# Copyright (C) 2006 Jeroen van Wolffelaar -# -# use is allowed under the terms of the GNU Public License (GPL) -# see http://www.fsf.org/copyleft/gpl.html for a copy of the license - -sub contents() { - my $nres = 0; - - my ($cgi) = @_; - - print "Extremely blunt ends-with search results:
";
-# only thing implemented yet: ends-with search
-    my $kw = lc $cgi->param("keywords");
-    # full filename search is tricky
-    my $ffn = $cgi->param("fullfilename");
-    $ffn = $ffn ? 1 : 0;
-
-
-my $suite = 'stable'; #fixme
-
-    my $reverses = tie my %reverses, 'DB_File', "$DBDIR/contents/reverse_$suite.db",
-	O_RDONLY, 0666, $DB_BTREE
-	or die "Failed opening reverse DB: $!";
-
-    if ($ffn) {
-	open FILENAMES, "$DBDIR/contents/filenames_$suite.txt"
-	    or die "Failed opening filename table";
-	while () {
-	    next if index($_, $kw)<0;
-	    chomp;
-	    last unless &dosearch(reverse($_)."/", \$nres, $reverses);
-	}
-	close FILENAMES;
-    } else {
-
-	$kw = reverse $kw;
-	
-	# exact filename searching follows trivially:
-	my $exact = $cgi->param("exact");
-	$kw = "$kw/" if $exact;
-
-	print "ERROR: Exact and fullfilenamesearch don't go along" if $ffn and $exact;
-
-	&dosearch($kw, \$nres, $reverses);
-    }
-    print "
$nres results displayed"; - $reverses = undef; - untie %reverses; - -} - -sub dosearch -{ - my ($kw, $nres, $reverses) = @_; - - my ($key, $rest) = ($kw, ""); - for (my $status = $reverses->seq($key, $value, R_CURSOR); - $status == 0; - $status = $reverses->seq( $key, $value, R_NEXT)) { - - # FIXME: what's the most efficient "is prefix of" thingy? We only want to know - # whether $kw is or is not a prefix of $key - last unless index($key, $kw) == 0; - - @hits = split /\0/o, $value; - print reverse($key)." is found in @hits\n"; - last if ($$nres)++ > 100; - } - - return $$nres<100; -} - -1; -# vim: ts=8 sw=4 diff --git a/lib/Packages/DoSearchContents.pm b/lib/Packages/DoSearchContents.pm new file mode 100644 index 0000000..b1290f2 --- /dev/null +++ b/lib/Packages/DoSearchContents.pm @@ -0,0 +1,159 @@ +package Packages::DoSearchContents; + +use strict; +use warnings; + +use Benchmark; +use DB_File; +use URI::Escape; +use HTML::Entities; +use Exporter; +our @ISA = qw( Exporter ); +our @EXPORT = qw( do_search_contents ); + +use Deb::Versions; +use Packages::Search qw( :all ); +use Packages::CGI; +use Packages::DB; +use Packages::Config qw( $DBDIR $SEARCH_URL $SEARCH_CGI $SEARCH_PAGE + @SUITES @ARCHIVES $ROOT ); + +sub do_search_contents { + my ($params, $opts, $html_header, $menu, $page_content) = @_; + + if ($params->{errors}{keywords}) { + fatal_error( "keyword not valid or missing" ); + } elsif (length($opts->{keywords}) < 2) { + fatal_error( "keyword too short (keywords need to have at least two characters)" ); + } + + $$menu = ""; + + my $keyword = $opts->{keywords}; + my $searchon = $opts->{searchon}; + my $exact = $opts->{exact}; + + # for URL construction + my $keyword_esc = uri_escape( $keyword ); + my $suites_param = join ',', @{$params->{values}{suite}{no_replace}}; + my $sections_param = join ',', @{$params->{values}{section}{no_replace}}; + my $archs_param = join ',', @{$params->{values}{arch}{no_replace}}; + + # for output + my $keyword_enc = encode_entities $keyword || ''; + my $searchon_enc = encode_entities $searchon; + my $suites_enc = encode_entities( join( ', ', @{$params->{values}{suite}{no_replace}} ) ); + my $sections_enc = encode_entities( join( ', ', @{$params->{values}{section}{no_replace}} ) ); + my $archs_enc = encode_entities( join( ', ', @{$params->{values}{arch}{no_replace}} ) ); + + my $st0 = new Benchmark; + my (@results, @non_results); + + unless (@Packages::CGI::fatal_errors) { + + my $nres = 0; + + my $kw = lc $keyword; + # full filename search is tricky + my $ffn = $searchon eq 'filenames'; + + my $suite = 'stable'; #fixme + + my $reverses = tie my %reverses, 'DB_File', "$DBDIR/contents/reverse_$suite.db", + O_RDONLY, 0666, $DB_BTREE + or die "Failed opening reverse DB: $!"; + + if ($ffn) { + open FILENAMES, '-|', 'fgrep', '--', "$DBDIR/contents/filenames_$suite.txt" + or die "Failed opening filename table: $!"; + while () { + chomp; + last unless &searchfile(\@results, reverse($_)."/", \$nres, $reverses); + } + close FILENAMES; + } else { + + $kw = reverse $kw; + + # exact filename searching follows trivially: + $kw = "$kw/" if $exact; + + print "ERROR: Exact and fullfilenamesearch don't go along" if $ffn and $exact; + + &searchfile(\@results, $kw, \$nres, $reverses); + } + $reverses = undef; + untie %reverses; + + + my $st1 = new Benchmark; + my $std = timediff($st1, $st0); + debug( "Search took ".timestr($std) ); + } + + my $suite_wording = $suites_enc eq "all" ? "all suites" + : "suite(s) $suites_enc"; + my $section_wording = $sections_enc eq 'all' ? "all sections" + : "section(s) $sections_enc"; + my $arch_wording = $archs_enc eq 'any' ? "all architectures" + : "architecture(s) $archs_enc"; + my $wording = $opts->{exact} ? "exact filenames" : "filenames that contain"; + $wording = "paths that end with" if $searchon eq "contents"; + msg( "You have searched for ${wording} $keyword_enc in $suite_wording, $section_wording, and $arch_wording." ); + + if ($Packages::Search::too_many_hits) { + error( "Your search was too wide so we will only display exact matches. At least $Packages::Search::too_many_hits results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords." ); + } + + $$page_content = ''; + if (!@Packages::CGI::fatal_errors && !@results) { + $$page_content .= "No results"; + } + + %$html_header = ( title => 'Package Contents Search Results' , + lang => 'en', + title_tag => 'Debian Package Contents Search Results', + print_title => 1, + print_search_field => 'packages', + search_field_values => { + keywords => $keyword_enc, + searchon => 'contents', + arch => $archs_enc, + suite => $suites_enc, + section => $sections_enc, + exact => $opts->{exact}, + debug => $opts->{debug}, + }, + ); + + if (@results) { + $$page_content .= scalar @results . " results displayed:
"; + foreach (@results) { + $$page_content .= "$_
\n"; + } + } +} # sub do_search_contents + +sub searchfile +{ + my ($results, $kw, $nres, $reverses) = @_; + + my ($key, $value) = ($kw, ""); + for (my $status = $reverses->seq($key, $value, R_CURSOR); + $status == 0; + $status = $reverses->seq( $key, $value, R_NEXT)) { + + # FIXME: what's the most efficient "is prefix of" thingy? We only want to know + # whether $kw is or is not a prefix of $key + last unless index($key, $kw) == 0; + + my @hits = split /\0/o, $value; + push @$results, reverse($key)." is found in @hits"; + last if ($$nres)++ > 100; + } + + return $$nres<100; +} + + +1; -- 2.39.2