or die "Error creating DB: $!";
print "Index $#descriptions descriptions\n";
for (my $i=1; $i<= $#descriptions; $i++) {
- my $plain_description = $descriptions[$i];
# strip away additional data
- my ($only_desc) = split /\000/o, $plain_description, 2;
+ my ($only_desc, $pkg, $tags) = split /\000/o, $descriptions[$i], 3;
# WARNING: This needs to correspond with what happens in
-# Packages/Search.pm:do_fulltext_search
- $plain_description =~ tr [A-Z] [a-z];
- # ensure one space on both ends
- $plain_description = " $plain_description ";
- $plain_description =~ s/[(),.-]+//og;
- $plain_description =~ s#[^a-z0-9_/+]+# #og;
+# Packages/Search.pm:do_xapian_search
+ $only_desc =~ s#[^\w/+]+# #og;
#XAPIAN
eval {
- my @words = split /\s+/, $plain_description;
- my $stem_words = $stemmer->stem( \@words );
+ my @words = split /\s+/, $only_desc;
+ unshift @words, $pkg;
+
my $doc = Search::Xapian::Document->new()
or die "can't create doc object for $i: $!\n";
if ($doc->set_data($i)){
warn "can't set_data in doc object for $i: $!\n";
}
- for my $j (0 .. (@$stem_words-1)) {
- next if $stem_words->[$j] =~ /^\s*$/o;
- if ($doc->add_posting($stem_words->[$j], $j)) {
- warn "can't add word $stem_words->[$j] $j: $!\n";
+
+ # package with prefix
+ if ($doc->add_term("P$pkg")) {
+ warn "can't add term P$pkg: $!\n";
+ }
+ # description, unstemmed with positional info
+ for my $j (0 .. (@words-1)) {
+ next if $words[$j] =~ /^\s*$/o;
+ if ($doc->add_posting($words[$j], $j)) {
+ warn "can't add posting $words[$j] at $j: $!\n";
}
}
+ # description, stemmed
+ my $stem_words = $stemmer->stem( \@words );
+ foreach my $w (@$stem_words) {
+ next if $w =~ /^\s*$/o;
+ if ($doc->add_term($w)) {
+ warn "can't add term $w: $!\n";
+ }
+ }
+ if ($tags) {
+ foreach my $t (split /, /, $tags) {
+ if ($doc->add_term($t)) {
+ warn "can't add term $t: $!\n";
+ }
+ }
+ }
+
$xapian_db->add_document($doc)
or warn "failed to add document: $i\n";
};
if (@results) {
my (%pkgs, %subsect, %sect, %archives, %desc, %binaries, %provided_by);
+ my %sort_by_relevance;
+ for (1 ... scalar @results) {
+# debug("$results[$_][0] => $_", 4) if DEBUG;
+ $sort_by_relevance{$results[$_-1][0]} = $_;
+ }
+# use Data::Dumper;
+# debug( "sort_by_relevance=".Dumper(\%sort_by_relevance), 4);
+
unless ($opts->{source}) {
foreach (@results) {
my ($pkg_t, $archive, $suite, $arch, $section, $subsection,
}
my %uniq_pkgs = map { $_ => 1 } (keys %pkgs, keys %provided_by);
- my @pkgs = sort keys %uniq_pkgs;
+ my @pkgs;
+ if ($searchon eq 'names') {
+ @pkgs = sort keys %uniq_pkgs;
+ } else {
+ @pkgs = sort { $sort_by_relevance{$a} <=> $sort_by_relevance{$b} } keys %uniq_pkgs;
+ }
process_packages( $page_content, 'packages', \%pkgs, \@pkgs, $opts, \@keywords,
\&process_package, \%provided_by,
\%archives, \%sect, \%subsect,
# NOTE: this needs to correspond with parse-packages!
my @tmp;
foreach my $keyword (@$keywords) {
- $keyword =~ tr [A-Z] [a-z];
- if ($opts->{exact}) {
- $keyword = " $keyword ";
- }
- $keyword =~ s/[(),.-]+//og;
- $keyword =~ s;[^a-z0-9_/+]+; ;og;
+ $keyword =~ s;[^\w/+]+; ;og;
push @tmp, $keyword;
}
my $stemmer = Lingua::Stem->new();
- $keywords = $stemmer->stem( @tmp );
+ my $stemmed_keywords = $stemmer->stem( @tmp );
my $db = Search::Xapian::Database->new( $db );
- my $enq = $db->enquire( OP_AND, @$keywords );
+ my $enq = $db->enquire( OP_OR, @$keywords, @$stemmed_keywords );
debug( "Xapian Query was: ".$enq->get_query()->get_description(), 1) if DEBUG;
- my @matches = $enq->matches(0, 100);
+ my @matches = $enq->matches(0, 999);
- my $numres = 0;
- my %tmp_results;
+ my (@order, %tmp_results);
foreach my $match ( @matches ) {
my $id = $match->get_docid();
my $result = $did2pkg->{$id};
foreach (split /\000/o, $result) {
my @data = split /\s/, $_, 3;
-# debug ("Considering $data[0], arch = $data[2]", 3) if DEBUG;
+ debug ("Considering $data[0], arch = $data[2], relevance=".$match->get_percent(), 3) if DEBUG;
# next unless $data[2] eq 'all' || $opts->{h_archs}{$data[2]};
# debug ("Ok", 3) if DEBUG;
- $numres++ unless $tmp_results{$data[0]}++;
+ unless ($tmp_results{$data[0]}++) {
+ push @order, $data[0];
+ }
}
- last if $numres > 100;
+ last if @order > 100;
}
undef $db;
- $too_many_hits++ if $numres > 100;
+ $too_many_hits++ if @order > 100;
- foreach my $pkg (keys %tmp_results) {
+ debug ("ORDER: @order", 2) if DEBUG;
+ foreach my $pkg (@order) {
&$read_entry( $packages, $pkg, $results, $non_results, $opts );
}
}