]> git.deb.at Git - deb/packages.git/commitdiff
Optimize and improve full description search by stripping away insignificant
authorJeroen van Wolffelaar <jeroen@wolffelaar.nl>
Fri, 10 Feb 2006 12:49:58 +0000 (12:49 +0000)
committerJeroen van Wolffelaar <jeroen@wolffelaar.nl>
Fri, 10 Feb 2006 12:49:58 +0000 (12:49 +0000)
characters, and use 'index' instead of 'regex'. Dropped possibility to do
caseful searches.

bin/parse-packages
lib/Packages/Search.pm

index 38c83e988fbfcc8e890d44a30bbe47b5030c5708..e6a30829cd5ad30bb5f853d4ff3c8e328d624aec 100755 (executable)
@@ -184,8 +184,13 @@ tie %descriptions_db, "DB_File", "$DBDIR/descriptions.db.new",
 open DESCR, ">", "$DBDIR/descriptions.txt" or die "Error creating descriptions textfile";
 for (my $i=1; $i<= $#descriptions; $i++) {
        my $plain_description = $descriptions[$i];
-       $plain_description =~ s/\n .\n/ /og;
-       $plain_description =~ s/[\n \t]+/ /og;
+# WARNING: This needs to correspond with what happens in
+# Packages/Search.pm:do_fulltext_search
+       $plain_description =~ tr [A-Z] [a-z];
+       # ensure one space on both ends
+       $plain_description = " $plain_description ";
+       $plain_description =~ s/[(),.-]+//og;
+       $plain_description =~ s#[^a-z0-9_/+]+# #og;
        print DESCR "$plain_description\n";
        $descriptions_db{$i} = $descriptions[$i];
 }
index 64fa295d29498f8ef2071742478591c2855b0a88..6ae7d9722173de7bf89aa99ab37b7259d1c36b06 100644 (file)
@@ -442,26 +442,19 @@ sub do_fulltext_search {
     my ($keyword, $file, $did2pkg, $packages, $read_entry, $opts) = @_;
     my @results;
 
+# NOTE: this needs to correspond with parse-packages!
     my @lines;
-    my $regex;
-    if ($opts->{case_bool}) {
-       if ($opts->{exact}) {
-           $regex = qr/\b\Q$keyword\E\b/o;
-       } else {
-           $regex = qr/\Q$keyword\E/o;
-       }
-    } else {
-       if ($opts->{exact}) {
-           $regex = qr/\b\Q$keyword\E\b/io;
-       } else {
-           $regex = qr/\Q$keyword\E/io;
-       }
+    $keyword =~ tr [A-Z] [a-z];
+    if ($opts->{exact}) {
+       $keyword = " $keyword ";
     }
+    $keyword =~ s/[(),.-]+//og;
+    $keyword =~ s#[^a-z0-9_/+]+# #og;
 
     open DESC, '<', "$file"
        or die "couldn't open $file: $!";
     while (<DESC>) {
-       $_ =~ $regex or next;
+       next if index $_, $keyword < 0;
        debug( "Matched line $.", 2);
        push @lines, $.;
     }