X-Git-Url: https://git.deb.at/?a=blobdiff_plain;ds=sidebyside;f=lib%2FPackages%2FSearch.pm;h=d3872672edeadfadc8c259d2546b4141631b7121;hb=e251bfc88a72b4836e8dea25baeb87990310d653;hp=b616f6f213de0b23392632af6b1be19ae4158810;hpb=dc9512b1309f4c8c6bd1171b543183e8ac8b2115;p=deb%2Fpackages.git
diff --git a/lib/Packages/Search.pm b/lib/Packages/Search.pm
index b616f6f..d387267 100644
--- a/lib/Packages/Search.pm
+++ b/lib/Packages/Search.pm
@@ -1,8 +1,8 @@
#
# Packages::Search
#
-# Copyright (C) 2004-2006 Frank Lichtenheld $index_line Found $no_results matching packages,";
- if ($end == $start) {
- print " displaying package $end.
Results per page: "; - my @resperpagelinks; - for (50, 100, 200) { - if ($opts->{number} == $_) { - push @resperpagelinks, $_; - } else { - push @resperpagelinks, resperpagelink($input,$opts,$_); - } - } - if ($opts->{number_all}) { - push @resperpagelinks, "all"; +sub read_entry_all { + my ($hash, $key, $results, $non_results, $opts) = @_; + my ($virt, $result) = split /\000/o, $hash->{$key} || "-\01-", 2; + + my %virt = split /\01/o, $virt; + while (my ($suite, $provides) = each %virt) { + next if $suite eq '-'; + if ($opts->{h_suites}{$suite}) { + push @$results, [ $key, "-", $suite, 'virtual', 'v', 'v', 'v', 'v', + $provides]; } else { - push @resperpagelinks, resperpagelink($input, $opts, "all"); + push @$non_results, [ $key, "-", $suite, 'virtual', 'v', 'v', 'v', 'v', + $provides]; } - print join( " | ", @resperpagelinks )."
"; } - return ( $start, $end ); -} -sub read_entry_all { - my ($hash, $key, $results, $non_results, $opts) = @_; - my $result = $hash->{$key} || ''; - foreach (split /\000/o, $result) { + foreach (split(/\000/o, $result||'')) { my @data = split ( /\s/o, $_, 8 ); - debug( "Considering entry ".join( ':', @data), 2); - if ($opts->{h_archives}{$data[0]} && $opts->{h_suites}{$data[1]} - && ($opts->{h_archs}{$data[2]} || $data[2] eq 'all' - || $data[2] eq 'virtual') - && ($opts->{h_sections}{$data[3]} || $data[3] eq '-')) { - debug( "Using entry ".join( ':', @data), 2); + debug( "Considering entry ".join( ':', @data), 2) if DEBUG; + if ($opts->{h_suites}{$data[1]} + && ($opts->{h_archs}{$data[2]} || $data[2] eq 'all') + && $opts->{h_sections}{$data[3]}) { + debug( "Using entry ".join( ':', @data), 2) if DEBUG; push @$results, [ $key, @data ]; } else { push @$non_results, [ $key, @data ]; @@ -361,35 +100,53 @@ sub read_entry { my @non_results; read_entry_all( $hash, $key, $results, \@non_results, $opts ); } + +#FIXME: make configurable +my %fallback_suites = ( + 'sarge-backports' => 'sarge', + 'sarge-volatile' => 'sarge', + 'etch-backports' => 'etch', + 'etch-volatile' => 'etch', + experimental => 'sid' ); + sub read_entry_simple { my ($hash, $key, $archives, $suite) = @_; - my $result = $hash->{$key} || ''; - my @data_fuzzy; + # FIXME: drop $archives + + my ($virt, $result) = split /\000/o, $hash->{$key} || "-\01-\0", 2; + my %virt = split /\01/o, $virt; + debug( "read_entry_simple: key=$key, archives=". + join(" ",(keys %$archives)).", suite=$suite", 1) if DEBUG; + debug( "read_entry_simple: virt=".join(" ",(%virt)), 2) if DEBUG; + # FIXME: not all of the 2^4=16 combinations of empty(results), + # empty(virt{suite}), empty(fb_result), empty(virt{fb_suite}) are dealt + # with correctly, but it's adequate enough for now + return [ $virt{$suite} ] unless defined $result; foreach (split /\000/o, $result) { my @data = split ( /\s/o, $_, 8 ); - debug( "Considering entry ".join( ':', @data), 2); - if ($data[1] eq $suite) { - if ($archives->{$data[0]}) { - debug( "Using entry ".join( ':', @data), 2); - return \@data; - } elsif ($data[0] eq 'us') { - debug( "Fuzzy entry ".join( ':', @data), 2); - @data_fuzzy = @data; - } - } + debug( "use entry: @data", 2 ) if DEBUG && $data[1] eq $suite; + return [ $virt{$suite}, @data ] if $data[1] eq $suite; } - return \@data_fuzzy; + if (my $fb_suite = $fallback_suites{$suite}) { + my $fb_result = read_entry_simple( $hash, $key, $archives, $fb_suite ); + my $fb_virt = shift(@$fb_result); + $virt{$suite} .= $virt{$suite} ? " $fb_virt" : $fb_virt if $fb_virt; + return [ $virt{$suite}, @$fb_result ] if @$fb_result; + } + return [ $virt{$suite} ]; } + sub read_src_entry_all { my ($hash, $key, $results, $non_results, $opts) = @_; my $result = $hash->{$key} || ''; + debug( "read_src_entry_all: key=$key", 1) if DEBUG; foreach (split /\000/o, $result) { my @data = split ( /\s/o, $_, 6 ); - debug( "Considering entry ".join( ':', @data), 2); + debug( "Considering entry ".join( ':', @data), 2) if DEBUG; if ($opts->{h_archives}{$data[0]} && $opts->{h_suites}{$data[1]} && $opts->{h_sections}{$data[2]}) { - debug( "Using entry ".join( ':', @data), 2); + debug( "Using entry ".join( ':', @data), 2) if DEBUG; push @$results, [ $key, @data ]; } else { push @$non_results, [ $key, @data ]; @@ -402,84 +159,130 @@ sub read_src_entry { read_src_entry_all( $hash, $key, $results, \@non_results, $opts ); } sub do_names_search { - my ($keyword, $packages, $postfixes, $read_entry, $opts) = @_; - my @results; + my ($keywords, $packages, $postfixes, $read_entry, $opts, + $results, $non_results) = @_; - $keyword = lc $keyword unless $opts->{case_bool}; + my $first_keyword = lc shift @$keywords; + @$keywords = map { lc $_ } @$keywords; - if ($opts->{exact}) { - &$read_entry( $packages, $keyword, \@results, $opts ); - } else { - my ($key, $prefixes) = ($keyword, ''); - my %pkgs; - $postfixes->seq( $key, $prefixes, R_CURSOR ); - while (index($key, $keyword) >= 0) { - if ($prefixes =~ /^\001(\d+)/o) { - $too_many_hits += $1; - } else { - foreach (split /\000/o, $prefixes) { - $_ = '' if $_ eq '^'; - debug( "add word $_$key", 2); - $pkgs{$_.$key}++; + my ($key, $prefixes) = ($first_keyword, ''); + my %pkgs; + $postfixes->seq( $key, $prefixes, R_CURSOR ); + while (index($key, $first_keyword) >= 0) { + if ($prefixes =~ /^\001(\d+)/o) { + debug( "$key has too many hits", 2 ) if DEBUG; + $too_many_hits += $1; + } else { + PREFIX: + foreach (split /\000/o, $prefixes) { + $_ = '' if $_ eq '^'; + my $word = "$_$key"; + foreach my $k (@$keywords) { + next PREFIX unless $word =~ /\Q$k\E/; } + debug( "add word $word", 2) if DEBUG; + $pkgs{$word}++; } - last if $postfixes->seq( $key, $prefixes, R_NEXT ) != 0; - last if $too_many_hits or keys %pkgs >= 100; - } - - my $no_results = keys %pkgs; - if ($too_many_hits || ($no_results >= 100)) { - $too_many_hits += $no_results; - %pkgs = ( $keyword => 1 ); - } - foreach my $pkg (sort keys %pkgs) { - &$read_entry( $packages, $pkg, \@results, $opts ); } + last if $postfixes->seq( $key, $prefixes, R_NEXT ) != 0; + last if $too_many_hits or keys %pkgs >= 100; + } + + my $no_results = keys %pkgs; + if ($too_many_hits || ($no_results >= 100)) { + $too_many_hits += $no_results; + %pkgs = ( $first_keyword => 1 ) unless @$keywords; + } + foreach my $pkg (sort keys %pkgs) { + &$read_entry( $packages, $pkg, $results, $non_results, $opts ); } - return \@results; } -sub do_fulltext_search { - my ($keyword, $file, $did2pkg, $packages, $read_entry, $opts) = @_; - my @results; - - my @lines; - my $regex; - if ($opts->{case_bool}) { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/o; - } else { - $regex = qr/\Q$keyword\E/o; - } - } else { - if ($opts->{exact}) { - $regex = qr/\b\Q$keyword\E\b/io; - } else { - $regex = qr/\Q$keyword\E/io; + +sub do_xapian_search { + my ($keywords, $dbpath, $did2pkg, $packages, $read_entry, $opts, + $results, $non_results) = @_; + +# NOTE: this needs to correspond with parse-packages! + my @tmp; + foreach my $keyword (@$keywords) { + $keyword =~ s;[^\w/+]+; ;og; + push @tmp, $keyword; + } + my $stemmer = Lingua::Stem->new(); + my $stemmed_keywords = $stemmer->stem( @tmp ); + + my $db = Search::Xapian::Database->new( $dbpath ); + my $enq = $db->enquire( OP_OR, @$keywords, @$stemmed_keywords ); + debug( "Xapian Query was: ".$enq->get_query()->get_description(), 1) if DEBUG; + my @matches = $enq->matches(0, 999); + + my (@order, %tmp_results); + foreach my $match ( @matches ) { + my $id = $match->get_docid(); + my $result = $did2pkg->{$id}; + + foreach (split /\000/o, $result) { + my @data = split /\s/, $_, 3; + debug ("Considering $data[0], arch = $data[2], relevance=".$match->get_percent(), 3) if DEBUG; +# next unless $data[2] eq 'all' || $opts->{h_archs}{$data[2]}; +# debug ("Ok", 3) if DEBUG; + unless ($tmp_results{$data[0]}++) { + push @order, $data[0]; + } } + last if @order > 100; + } + undef $db; + $too_many_hits++ if @order > 100; + + debug ("ORDER: @order", 2) if DEBUG; + foreach my $pkg (@order) { + &$read_entry( $packages, $pkg, $results, $non_results, $opts ); } +} + +sub find_similar { + my ($pkg, $dbpath, $did2pkg) = @_; + + my $db = Search::Xapian::Database->new( $dbpath ); + my $enq = $db->enquire( "P$pkg" ); + debug( "Xapian Query was: ".$enq->get_query()->get_description(), 1) if DEBUG; + my $first_match = ($enq->matches(0,1))[0]->get_document(); + + my @terms; + my $term_it = $first_match->termlist_begin(); + my $term_end = $first_match->termlist_end(); - open DESC, '<', "$file" - or die "couldn't open $file: $!"; - while (