From: Frank Lichtenheld Date: Sun, 17 Jun 2007 17:13:13 +0000 (+0200) Subject: Remove obsolete descriptions.txt previously used for fulltext search X-Git-Url: https://git.deb.at/w?a=commitdiff_plain;h=cdae3720732a12007e363f01c24de74c2620d969;p=deb%2Fpackages.git Remove obsolete descriptions.txt previously used for fulltext search I will definetly stick with xapian for now. --- diff --git a/BACKEND b/BACKEND index 445a45d..11f1409 100644 --- a/BACKEND +++ b/BACKEND @@ -41,11 +41,6 @@ Generated by means of Packages.gz files: | key: "packagename version arch" | value: a unique description id, did -| descriptions.txt: -| on each line: -| description with strange characters mangled for proper substring -| searching, linenumber being the did - | descriptions.db: | key: did | value: description, first line being short, the rest being long [no diff --git a/TODO b/TODO index d705607..0c45640 100644 --- a/TODO +++ b/TODO @@ -19,10 +19,6 @@ search_packages: matches" and abce: 90 matches" - fulltext search: - - Max 100 results - - Better exact=1 performance by indexing per word? - - drop case-sensitive from options, descriptions.txt all lowercase and without - punctuation, such that instead of =~ //, indexof can be used - in results, show full descriptions, so one sees what's being matched? - backend: diff --git a/bin/parse-packages b/bin/parse-packages index 79fb6d9..1536b81 100755 --- a/bin/parse-packages +++ b/bin/parse-packages @@ -246,7 +246,6 @@ my %descriptions_db; tie %descriptions_db, "DB_File", "$DBDIR/descriptions.db.new", O_RDWR|O_CREAT, 0666, $DB_BTREE or die "Error creating DB: $!"; -open DESCR, ">", "$DBDIR/descriptions.txt" or die "Error creating descriptions textfile"; print "Index $#descriptions descriptions\n"; for (my $i=1; $i<= $#descriptions; $i++) { my $plain_description = $descriptions[$i]; @@ -259,7 +258,6 @@ for (my $i=1; $i<= $#descriptions; $i++) { $plain_description = " $plain_description "; $plain_description =~ s/[(),.-]+//og; $plain_description =~ s#[^a-z0-9_/+]+# #og; - print DESCR "$plain_description\n"; #XAPIAN eval { @@ -283,7 +281,6 @@ for (my $i=1; $i<= $#descriptions; $i++) { $descriptions_db{$i} = $only_desc; } -close DESCR; untie %descriptions_db; $xapian_db->flush; undef $xapian_db; @@ -332,6 +329,5 @@ rename("$DBDIR/packages_descriptions.db.new", "$DBDIR/packages_descriptions.db"); rename("$DBDIR/descriptions_packages.db.new", "$DBDIR/descriptions_packages.db"); -rename("$DBDIR/descriptions.txt.new", "$DBDIR/descriptions.txt"); rename("$DBDIR/descriptions.db.new", "$DBDIR/descriptions.db"); rename("$DBDIR/package_postfixes.db.new", "$DBDIR/package_postfixes.db"); diff --git a/lib/Packages/DoSearch.pm b/lib/Packages/DoSearch.pm index a94dd7b..5b0b4bb 100644 --- a/lib/Packages/DoSearch.pm +++ b/lib/Packages/DoSearch.pm @@ -53,21 +53,14 @@ sub do_search { do_names_search( [ @keywords ], \%packages, $p_obj, \&read_entry_all, $opts, \@results, \@non_results ); -# my $fts0 = new Benchmark; -# do_fulltext_search( [ @keywords ], "$DBDIR/descriptions.txt", -# \%did2pkg, \%packages, -# \&read_entry_all, $opts, -# \@results, \@non_results ); my $fts1 = new Benchmark; do_xapian_search( [ @keywords ], "$DBDIR/xapian/", \%did2pkg, \%packages, \&read_entry_all, $opts, \@results, \@non_results ); my $fts2 = new Benchmark; -# my $fts_grep = timediff($fts1,$fts0); my $fts_xapian = timediff($fts2,$fts1); -# debug( "Fulltext search took ".timestr($fts_grep)." (grep)" ) if DEBUG; - debug( "Fulltext search took ".timestr($fts_xapian)." (Xapian)" ) + debug( "Fulltext search took ".timestr($fts_xapian) ) if DEBUG; } } diff --git a/lib/Packages/Search.pm b/lib/Packages/Search.pm index fdd3a3a..671e340 100644 --- a/lib/Packages/Search.pm +++ b/lib/Packages/Search.pm @@ -196,55 +196,6 @@ sub do_names_search { &$read_entry( $packages, $pkg, $results, $non_results, $opts ); } } -sub do_fulltext_search { - my ($keywords, $file, $did2pkg, $packages, $read_entry, $opts, - $results, $non_results) = @_; - -# NOTE: this needs to correspond with parse-packages! - my @tmp; - foreach my $keyword (@$keywords) { - $keyword =~ tr [A-Z] [a-z]; - if ($opts->{exact}) { - $keyword = " $keyword "; - } - $keyword =~ s/[(),.-]+//og; - $keyword =~ s;[^a-z0-9_/+]+; ;og; - push @tmp, $keyword; - } - my $first_keyword = shift @tmp; - @$keywords = @tmp; - - my $numres = 0; - my %tmp_results; - # fgrep is seriously faster than using perl - open DESC, '-|', 'fgrep', '-n', '--', $first_keyword, $file - or die "couldn't open $file: $!"; - LINE: - while () { - foreach my $k (@$keywords) { - next LINE unless /\Q$k\E/; - } - /^(\d+)/; - my $nr = $1; - debug( "Matched line $_", 2) if DEBUG; - my $result = $did2pkg->{$nr}; - foreach (split /\000/o, $result) { - my @data = split /\s/, $_, 3; -# debug ("Considering $data[0], arch = $data[2]", 3) if DEBUG; -# next unless $data[2] eq 'all' || $opts->{h_archs}{$data[2]}; -# debug ("Ok", 3) if DEBUG; - $numres++ unless $tmp_results{$data[0]}++; - } - last if $numres > 100; - } - close DESC; - $too_many_hits++ if $numres > 100; - - my @results; - foreach my $pkg (keys %tmp_results) { - &$read_entry( $packages, $pkg, $results, $non_results, $opts ); - } - } sub do_xapian_search { my ($keywords, $db, $did2pkg, $packages, $read_entry, $opts,