From: Jeroen van Wolffelaar Date: Fri, 10 Feb 2006 12:49:58 +0000 (+0000) Subject: Optimize and improve full description search by stripping away insignificant X-Git-Tag: switch-to-templates~145 X-Git-Url: https://git.deb.at/w?a=commitdiff_plain;h=5ac2b90beab477b44b863b2aa895e8fb3fedcd07;p=deb%2Fpackages.git Optimize and improve full description search by stripping away insignificant characters, and use 'index' instead of 'regex'. Dropped possibility to do caseful searches. --- diff --git a/bin/parse-packages b/bin/parse-packages index 38c83e9..e6a3082 100755 --- a/bin/parse-packages +++ b/bin/parse-packages @@ -184,8 +184,13 @@ tie %descriptions_db, "DB_File", "$DBDIR/descriptions.db.new", open DESCR, ">", "$DBDIR/descriptions.txt" or die "Error creating descriptions textfile"; for (my $i=1; $i<= $#descriptions; $i++) { my $plain_description = $descriptions[$i]; - $plain_description =~ s/\n .\n/ /og; - $plain_description =~ s/[\n \t]+/ /og; +# WARNING: This needs to correspond with what happens in +# Packages/Search.pm:do_fulltext_search + $plain_description =~ tr [A-Z] [a-z]; + # ensure one space on both ends + $plain_description = " $plain_description "; + $plain_description =~ s/[(),.-]+//og; + $plain_description =~ s#[^a-z0-9_/+]+# #og; print DESCR "$plain_description\n"; $descriptions_db{$i} = $descriptions[$i]; } diff --git a/lib/Packages/Search.pm b/lib/Packages/Search.pm index 64fa295..6ae7d97 100644 --- a/lib/Packages/Search.pm +++ b/lib/Packages/Search.pm @@ -442,26 +442,19 @@ sub do_fulltext_search { my ($keyword, $file, $did2pkg, $packages, $read_entry, $opts) = @_; my @results; +# NOTE: this needs to correspond with parse-packages! my @lines; - my $regex; - if ($opts->{case_bool}) { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/o; - } else { - $regex = qr/\Q$keyword\E/o; - } - } else { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/io; - } else { - $regex = qr/\Q$keyword\E/io; - } + $keyword =~ tr [A-Z] [a-z]; + if ($opts->{exact}) { + $keyword = " $keyword "; } + $keyword =~ s/[(),.-]+//og; + $keyword =~ s#[^a-z0-9_/+]+# #og; open DESC, '<', "$file" or die "couldn't open $file: $!"; while () { - $_ =~ $regex or next; + next if index $_, $keyword < 0; debug( "Matched line $.", 2); push @lines, $.; }