From: Frank Lichtenheld Date: Fri, 1 Feb 2008 00:45:41 +0000 (+0100) Subject: Improve result sampling in case we have too generic keywords X-Git-Url: https://git.deb.at/?a=commitdiff_plain;h=b84419b2f87cf3c43b83fca71486be4fae81375e;p=deb%2Fpackages.git Improve result sampling in case we have too generic keywords Instead of either displaying the exact match if we have only one keyword and nothing otherwise, lets try to give people a sample of results, on the chance that might still be useful. Mostly just display the first 100 package names found (i.e. from suffix entries that had no "too many prefixes" value). Allow suffix entries with "too many prefixes" value to specifiy if the empty prefix was contained in the set (that effectivly avoids a regression in the case "single keyword, exact match exists"). If in doubt, prefer package names that begin with the first keyword and only abort if we have more than 100 of those (The worst case behaviour of this should be very limited to due the "too many prefixes" constraints). --- diff --git a/bin/parse-packages b/bin/parse-packages index 2e713c6..97edffc 100755 --- a/bin/parse-packages +++ b/bin/parse-packages @@ -336,11 +336,10 @@ tie %package_postfixes_db, "DB_File", "$DBDIR/package_postfixes.db.new", or die "Error creating DB: $!"; while (my ($k, $v) = each(%package_postfixes)) { $v =~ s/.$//s; - my $nr = $v; - $nr =~ s/[^\000]//g; - $nr = length($nr) + 1; # < number of hits + my $nr = ($v =~ tr/\000/\000/) + 1; if ($nr > $MAX_PACKAGE_POSTFIXES) { - $v = "\001" . $nr; + $v = ($v =~ /\^/) ? "^\001" . $nr + : "\001" . $nr; } $package_postfixes_db{$k} = $v; } diff --git a/lib/Packages/Search.pm b/lib/Packages/Search.pm index 04270f2..c33f142 100644 --- a/lib/Packages/Search.pm +++ b/lib/Packages/Search.pm @@ -164,14 +164,22 @@ sub do_names_search { my $first_keyword = lc shift @$keywords; @$keywords = map { lc $_ } @$keywords; - + my ($key, $prefixes) = ($first_keyword, ''); - my %pkgs; + my (%pkgs, %pkgs_min); $postfixes->seq( $key, $prefixes, R_CURSOR ); while (index($key, $first_keyword) >= 0) { - if ($prefixes =~ /^\001(\d+)/o) { - debug( "$key has too many hits", 2 ) if DEBUG; - $too_many_hits += $1; + if ($prefixes =~ /^(\^)?\001(\d+)/o) { + debug("$key has too many hits", 2 ) if DEBUG; + $too_many_hits += $2; + if ($1) { # use the empty prefix + foreach my $k (@$keywords) { + next unless $key =~ /\Q$k\E/; + } + debug("add key $key", 2) if DEBUG; + $pkgs{$key}++; + $pkgs_min{$key}++; + } } else { PREFIX: foreach (split /\000/o, $prefixes) { @@ -180,18 +188,21 @@ sub do_names_search { foreach my $k (@$keywords) { next PREFIX unless $word =~ /\Q$k\E/; } - debug( "add word $word", 2) if DEBUG; + debug("add word $word", 2) if DEBUG; $pkgs{$word}++; + $pkgs_min{$word}++ if $_ eq ''; } } last if $postfixes->seq( $key, $prefixes, R_NEXT ) != 0; - last if $too_many_hits or keys %pkgs >= 100; + last if keys %pkgs_min >= 100; } - - my $no_results = keys %pkgs; - if ($too_many_hits || ($no_results >= 100)) { - $too_many_hits += $no_results; - %pkgs = ( $first_keyword => 1 ) unless @$keywords; + + my $nr = keys %pkgs; + my $min_nr = keys %pkgs_min; + debug("nr=$nr min_nr=$min_nr too_many_hits=$too_many_hits", 1) if DEBUG; + if ($nr >= 100) { + $too_many_hits += $nr - $min_nr + 1; + %pkgs = %pkgs_min; } foreach my $pkg (sort keys %pkgs) { &$read_entry( $packages, $pkg, $results, $non_results, $opts );