X-Git-Url: https://git.deb.at/?p=deb%2Fpackages.git;a=blobdiff_plain;f=cgi-bin%2Fsearch_packages.pl;h=f6df10c95635a882bc87a825eaf698266ae0777c;hp=4e2225ca25114b8f650ca68e666143078b79b84b;hb=7837241d743061df9f58680a9b786e48d3365a48;hpb=7ca948e9df320aa1785beda7bfcf927cf676af43

diff --git a/cgi-bin/search_packages.pl b/cgi-bin/search_packages.pl
index 4e2225c..f6df10c 100755
--- a/cgi-bin/search_packages.pl
+++ b/cgi-bin/search_packages.pl
@@ -177,6 +177,7 @@ my $search_on_sources = 0;
 
 my $st0 = new Benchmark;
 my @results;
+my $too_much_hits;
 if ($searchon eq 'sourcenames') {
     $search_on_sources = 1;
 }
@@ -187,6 +188,34 @@ my %archs = map { $_ => 1 } @archs;
 
 print "DEBUG: suites=@suites, sections=@sections, archs=@archs<br>" if $debug > 2;
 
+sub read_entry {
+    my ($hash, $key, $results) = @_;
+    my $result = $hash->{$key};
+    foreach (split /\000/, $result) {
+	my @data = split ( /\s/, $_, 7 );
+	print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+	if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
+	    && $sections{$data[2]}) {
+	    print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+	    push @$results, [ $key, @data ];
+	}
+    }
+}
+sub read_src_entry {
+    my ($hash, $key, $results) = @_;
+    my $result = $hash->{$key};
+
+    foreach (split /\000/, $result) {
+	my @data = split ( /\s/, $_, 5 );
+	print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
+	if ($suites{$data[0]} && $sections{$data[1]}) {
+	    print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
+	    push @$results, [ $key, @data ];
+	}
+    }
+}
+
+
 if ($searchon eq 'names') {
 
     $keyword = lc $keyword unless $case_bool;
@@ -195,30 +224,36 @@ if ($searchon eq 'names') {
 	or die "couldn't tie DB $DBDIR/packages_small.db: $!";
     
     if ($exact) {
-	my $result = $packages{$keyword};
-	foreach (split /\000/, $result) {
-	    my @data = split ( /\s/, $_, 7 );
-	    print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
-	    if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
-		&& $sections{$data[2]}) {
-		print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
-		push @results, [ $keyword, @data ];
-	    }
-	}
+	read_entry( \%packages, $keyword, \@results );
     } else {
-	while (my ($pkg, $result) = each %packages) {
-            #what's faster? I can't really see a difference
-	    (index($pkg, $keyword) >= 0) or next;
-	    #$pkg =~ /\Q$keyword\E/ or next;
-	    foreach (split /\000/, $packages{$pkg}) {
-		my @data = split ( /\s/, $_, 7 );
-		print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
-		if ($suites{$data[0]} && ($archs{$data[1]} || $data[1] eq 'all')
-		    && $sections{$data[2]}) {
-		    print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
-		    push @results, [ $pkg , @data ];
+	my ($key, $prefixes) = ($keyword, '');
+	my %pkgs;
+	my $p_obj = tie my %pref, 'DB_File', "$DBDIR/package_postfixes.db", O_RDONLY, 0666, $DB_BTREE
+	    or die "couldn't tie postfix db $DBDIR/package_postfixes.db: $!";
+	$p_obj->seq( $key, $prefixes, R_CURSOR );
+	do {
+            if ($prefixes =~ /^\001(\d+)/o) {
+                $too_much_hits += $1;
+            } else {
+		print "DEBUG: add word $key<br>" if $debug > 2;
+		$pkgs{$key}++;
+		foreach (split /\000/o, $prefixes) {
+		    print "DEBUG: add word $_$key<br>" if $debug > 2;
+		    $pkgs{$_.$key}++;
 		}
 	    }
+	} while (($p_obj->seq( $key, $prefixes, R_NEXT ) == 0)
+		 && (index($key, $keyword) >= 0)
+		 && !$too_much_hits
+		 && (keys %pkgs < 100));
+        
+        my $no_results = keys %pkgs;
+        if ($too_much_hits || ($no_results >= 100)) {
+	    $too_much_hits += $no_results;
+	    %pkgs = ( $keyword => 1 );
+	}
+	foreach my $pkg (sort keys %pkgs) {
+	    read_entry( \%packages, $pkg, \@results );
 	}
     }
 } elsif ($searchon eq 'sourcenames') {
@@ -229,21 +264,13 @@ if ($searchon eq 'names') {
 	or die "couldn't tie DB $DBDIR/sources_small.db: $!";
     
     if ($exact) {
-	my $result = $packages{$keyword};
-	foreach (split /\000/, $result) {
-	    my @data = split ( /\s/, $_, 5 );
-	    print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
-	    if ($suites{$data[0]} && $sections{$data[1]}) {
-		print "DEBUG: Using entry ".join( ':', @data)."<br>" if $debug > 2;
-		push @results, [ $keyword, @data ];
-	    }
-	}
+	read_src_entry( \%packages, $keyword, \@results );
     } else {
 	while (my ($pkg, $result) = each %packages) {
             #what's faster? I can't really see a difference
 	    (index($pkg, $keyword) >= 0) or next;
 	    #$pkg =~ /\Q$keyword\E/ or next;
-	    foreach (split /\000/, $packages{$pkg}) {
+	    foreach (split /\000/, $result) {
 		my @data = split ( /\s/, $_, 5 );
 		print "DEBUG: Considering entry ".join( ':', @data)."<br>" if $debug > 2;
 		if ($suites{$data[0]} && $sections{$data[1]}) {
@@ -253,6 +280,49 @@ if ($searchon eq 'names') {
 	    }
 	}
     }
+} else {
+
+    my @lines;
+    my $regex;
+    if ($case_bool) {
+	if ($exact) {
+	    $regex = qr/\b\Q$keyword\E\b/o;
+	} else {
+	    $regex = qr/\Q$keyword\E/o;
+	}
+    } else {
+	if ($exact) {
+	    $regex = qr/\b\Q$keyword\E\b/io;
+	} else {
+	    $regex = qr/\Q$keyword\E/io;
+	}
+    }
+
+    open DESC, '<', "$DBDIR/descriptions.txt" or die "couldn't open $DBDIR/descriptions.txt: $!";
+    while (<DESC>) {
+	$_ =~ $regex or next;
+	print "DEBUG: Matched line $.<br>" if $debug > 2;
+	push @lines, $.;
+    }
+    close DESC;
+
+    my $obj = tie my %packages, 'DB_File', "$DBDIR/packages_small.db", O_RDONLY, 0666, $DB_BTREE
+	or die "couldn't tie DB $DBDIR/packages_small.db: $!";
+    my $obj = tie my %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db", O_RDONLY, 0666, $DB_BTREE
+	or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!";
+
+    my %tmp_results;
+    foreach my $l (@lines) {
+	my $result = $did2pkg{$l};
+	foreach (split /\000/o, $result) {
+	    my @data = split /\s/, $_, 3;
+	    next unless $archs{$data[2]};
+	    $tmp_results{$data[0]}++;
+	}
+    }
+    foreach my $pkg (keys %tmp_results) {
+	read_entry( \%packages, $pkg, \@results ); 
+    }
 }
 
 my $st1 = new Benchmark;
@@ -276,6 +346,10 @@ if ($format eq 'html') {
     }
 }
 
+if ($too_much_hits) {
+print "<p><strong>Your search was too wide so we will only display exact matches. At least <em>$too_much_hits</em> results have been omitted and will not be displayed. Please consider using a longer keyword or more keywords.</strong></p>";
+}
+
 if (!@results) {
     if ($format eq 'html') {
 	my $keyword_esc = uri_escape( $keyword );