From 41ab743eae0a7875cf03a8b50a060da06af30b39 Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Sat, 2 Dec 2006 00:13:13 +0000 Subject: [PATCH] Use the debtags vocabulary to be able to show prettier tag names --- bin/parse-debtags-voc | 131 ++++++++++++++++++++++++++++++++++ cron.d/110debtags | 15 ++++ lib/Packages/DB.pm | 11 +-- lib/Packages/DoShow.pm | 2 +- templates/html/foot.tmpl | 2 +- templates/html/head.tmpl | 3 + templates/html/newpkg.tmpl | 2 +- templates/html/show.tmpl | 6 +- templates/html/tag_index.tmpl | 23 ++++++ 9 files changed, 187 insertions(+), 8 deletions(-) create mode 100755 bin/parse-debtags-voc create mode 100755 cron.d/110debtags create mode 100644 templates/html/tag_index.tmpl diff --git a/bin/parse-debtags-voc b/bin/parse-debtags-voc new file mode 100755 index 0000000..ebd6547 --- /dev/null +++ b/bin/parse-debtags-voc @@ -0,0 +1,131 @@ +#!/usr/bin/perl -w +# Convert Debtags vocabulary.gz files into Sleepycat db files +# +# $Id: parse-packages 227 2006-11-12 20:24:48Z djpig $ +# +# Copyright (C) 2006 Frank Lichtenheld +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +use strict; +use warnings; +use lib './lib'; + +$| = 1; + +use DB_File; +use File::Path; +use Data::Dumper; +use HTML::Entities; +use URI::Escape; + +use Deb::Versions; +use Packages::Template; +use Packages::Config qw( $TOPDIR ); +use Packages::CGI; +&Packages::Config::init( './' ); +my $debtagsdir = "$TOPDIR/files/debtags"; +my $wwwdir = "$TOPDIR/www/about"; +my $voc_file = "$debtagsdir/vocabulary"; +my (%voc, %voc_db); + +$/ = ""; + +delete $ENV{'LANGUAGE'}; +delete $ENV{'LANG'}; +delete $ENV{'LC_ALL'}; +delete $ENV{'LC_MESSAGES'}; + +print "Parsing Vocabulary...\n"; +tie %voc_db, "DB_File", "$debtagsdir/vocabulary.db.new", + O_RDWR|O_CREAT, 0666, $DB_BTREE + or die "Error creating DB: $!"; +open VOC, '<', $voc_file or die "Error opening vocabulary: $!"; + +while () { + next if /^\s*$/; + my $data = ""; + my %data = (); + chomp; + s/\n /\377/g; + while (/^(\S+):\s*(.*)\s*$/mg) { + my ($key, $value) = ($1, $2); + $value =~ s/\377/\n /g; + $key =~ tr [A-Z] [a-z]; + $data{$key} = $value; + } + my $voc_key = $data{facet} || $data{tag}; + unless ($voc_key) { + warn "No key found in ".Dumper(\%data); + next; + } + if ($voc{$voc_key}) { + warn "Duplicated key found: $voc_key\n"; + next; + } + my ($sdesc,$ldesc) = split /\n/, encode_entities($data{description}), 2; + + if ($ldesc) { + $ldesc =~ s,((ftp|http|https)://[\S~-]+?/?)((\>\;)?[)]?[']?[:.\,]?(\s|$)),$1$3,go; # syntax highlighting -> ']; + $ldesc =~ s/\A //o; + $ldesc =~ s/\n /\n/sgo; + $ldesc =~ s/\n.\n/\n

\n/go; + $ldesc =~ s/(((\n|\A) [^\n]*)+)/\n

$1\n<\/pre>/sgo;
+    }
+    $data{html_description} = [ $sdesc, $ldesc||"" ];
+
+    $voc_db{$voc_key} = $sdesc || "";
+    $voc{$voc_key} = \%data;
+}
+
+close VOC or warn "Couldn't close vocabulary: $!";
+
+#print Dumper(\%voc,\%voc_db);
+
+print "Creating tag list...\n";
+
+-d $wwwdir || mkpath( $wwwdir );
+open TAGLST, '>', "$wwwdir/debtags.en.html.new"
+    or die "Error creating tag list: $!";
+
+my $template = new Packages::Template( "$TOPDIR/templates", 'html', {} );
+my @facets = sort( grep { exists $voc{$_}{facet} } keys %voc );
+my @tags = sort( grep { exists $voc{$_}{tag} } keys %voc );
+my %tags_by_facet;
+foreach (@tags) {
+    my ($facet, $tag) = split /::/, $_, 2;
+    warn "No facet data available for $facet\n"
+	unless exists $voc{$facet};
+    $tags_by_facet{$facet} ||= [];
+    push @{$tags_by_facet{$facet}}, $_;
+}
+my %content = ( vocabulary => \%voc,
+		facets => \@facets, tags => \@tags,
+		tags_by_facet => \%tags_by_facet );
+# needed to work around the limitations of the the FILTER syntax
+$content{html_encode} = sub { return HTML::Entities::encode_entities(@_,'<>&"') };
+$content{uri_escape} = sub { return URI::Escape::uri_escape(@_) };
+$content{quotemeta} = sub { return quotemeta($_[0]) };
+$content{string2id} = sub { return &Packages::CGI::string2id(@_) };
+
+print TAGLST $template->page( 'tag_index', \%content );
+print TAGLST $template->trailer();
+close TAGLST or warn "Couldn't close tag list: $!";
+
+rename( "$wwwdir/debtags.en.html.new",
+	"$wwwdir/debtags.en.html" );
+
+untie %voc_db;
+rename( "$debtagsdir/vocabulary.db.new",
+	"$debtagsdir/vocabulary.db" );
diff --git a/cron.d/110debtags b/cron.d/110debtags
new file mode 100755
index 0000000..7d361fc
--- /dev/null
+++ b/cron.d/110debtags
@@ -0,0 +1,15 @@
+#! /bin/bash
+
+. `dirname $0`/../config.sh
+
+debtagsdir="$filesdir/debtags"
+
+test -d ${debtagsdir} || mkdir -p ${debtagsdir}
+cd ${debtagsdir}
+
+wget -q -N http://debtags.alioth.debian.org/tags/vocabulary.gz &&
+	gunzip -f vocabulary.gz
+
+cd "$topdir"
+
+./bin/parse-debtags-voc
diff --git a/lib/Packages/DB.pm b/lib/Packages/DB.pm
index 6d5db62..7d4f55f 100644
--- a/lib/Packages/DB.pm
+++ b/lib/Packages/DB.pm
@@ -6,14 +6,14 @@ use warnings;
 use Exporter;
 use DB_File;
 use Packages::CGI;
-use Packages::Config qw( $DBDIR );
+use Packages::Config qw( $TOPDIR $DBDIR );
 
 our @ISA = qw( Exporter );
 our ( %packages, %sources, %src2bin, %did2pkg, %descriptions,
-      %postf, %spostf,
+      %postf, %spostf, %debtags,
       $obj, $s_obj, $p_obj, $sp_obj );
 our @EXPORT = qw( %packages %sources %src2bin %did2pkg %descriptions
-		  %postf %spostf
+		  %postf %spostf %debtags
 		  $obj $s_obj $p_obj $sp_obj );
 our $db_read_time ||= 0;
 
@@ -35,6 +35,9 @@ sub init {
 	tie %did2pkg, 'DB_File', "$DBDIR/descriptions_packages.db",
 	O_RDONLY, 0666, $DB_BTREE
 	    or die "couldn't tie DB $DBDIR/descriptions_packages.db: $!";
+	tie %debtags, 'DB_File', "$TOPDIR/files/debtags/vocabulary.db",
+	O_RDONLY, 0666, $DB_BTREE
+	    or die "couldn't tie DB $TOPDIR/files/debtags/vocabulary.db: $!";
 	$p_obj = tie %postf, 'DB_File', "$DBDIR/package_postfixes.db",
 	O_RDONLY, 0666, $DB_BTREE
 	    or die "couldn't tie postfix db $DBDIR/package_postfixes.db: $!";
@@ -42,7 +45,7 @@ sub init {
 	O_RDONLY, 0666, $DB_BTREE
 	    or die "couldn't tie postfix db $DBDIR/source_postfixes.db: $!";
 
-    	debug( "tied databases ($dbmodtime > $db_read_time)" ) if DEBUG;
+	debug( "tied databases ($dbmodtime > $db_read_time)" ) if DEBUG;
 	$db_read_time = $dbmodtime;
     }
 }
diff --git a/lib/Packages/DoShow.pm b/lib/Packages/DoShow.pm
index d6502ab..21bd985 100644
--- a/lib/Packages/DoShow.pm
+++ b/lib/Packages/DoShow.pm
@@ -119,7 +119,7 @@ sub do_show {
 			debug( "Data search and merging took ".timestr($std) ) if DEBUG;
 
 			my $did = $page->get_newest( 'description' );
-			my @tags = split(/, /, $page->get_newest( 'tag' ));
+			my @tags = map { [ $_, $debtags{$_} ] } split(/, /, $page->get_newest( 'tag' ));
 			$contents{tags} = \@tags;
 
 			$section = $page->get_newest( 'section' );
diff --git a/templates/html/foot.tmpl b/templates/html/foot.tmpl
index 22961ca..28bd34f 100644
--- a/templates/html/foot.tmpl
+++ b/templates/html/foot.tmpl
@@ -20,7 +20,7 @@ Total page evaluation took [% benchmark %]
[% END %]
-

0 %]>Back to: [% organisation %] homepage || Packages search page

+

0 %]>Back to: [% organisation %] homepage || Packages search page


diff --git a/templates/html/head.tmpl b/templates/html/head.tmpl index bb752f8..74d8b4a 100644 --- a/templates/html/head.tmpl +++ b/templates/html/head.tmpl @@ -11,6 +11,9 @@ +[% IF rss_alternate %] + +[% END %] diff --git a/templates/html/newpkg.tmpl b/templates/html/newpkg.tmpl index 0586a6e..cfa6bd3 100644 --- a/templates/html/newpkg.tmpl +++ b/templates/html/newpkg.tmpl @@ -2,7 +2,7 @@ title_tag = "New Packages in \"$suite\"" description = desc keywords = "$suite, new packages, $section" - html_meta = '' + rss_alternate = 'newpkg?format=rss' -%]

New Packages in "[% suite %]"

diff --git a/templates/html/show.tmpl b/templates/html/show.tmpl index 43e9b60..43e5ca1 100644 --- a/templates/html/show.tmpl +++ b/templates/html/show.tmpl @@ -146,7 +146,11 @@ Do not install it on a normal Debian system.

Tags: [%- END %] - [% tag %][% ', ' UNLESS loop.last %] + [% IF tag.1 %] + [% tag.1 %][% ', ' UNLESS loop.last %] + [% ELSE %] + [% tag.0 %][% ', ' UNLESS loop.last %] + [% END %] [% IF loop.last -%]

diff --git a/templates/html/tag_index.tmpl b/templates/html/tag_index.tmpl new file mode 100644 index 0000000..df51e22 --- /dev/null +++ b/templates/html/tag_index.tmpl @@ -0,0 +1,23 @@ +[% PROCESS 'html/head.tmpl' + title_tag = "Overview of available Debian Package Tags" + keywords = debtags + navigation = [ { name=>'About', url=>"/about/" }, + { name=>'Debtags' } ] +%] +

Overview of available Debian Package Tags

+ +[% FOREACH facet IN facets %] +

Facet: [% facet %]

+

[% vocabulary.$facet.html_description.0 %]

+

[% vocabulary.$facet.html_description.1 %] + + [% FOREACH tag IN tags_by_facet.$facet %] + [% '

' IF loop.first %] +
[% tag %]
+

[% vocabulary.$tag.html_description.0 %] +

[% vocabulary.$tag.html_description.1 %] +

+ [% '
' IF loop.last %] + [% END %] + +[% END %] -- 2.39.2