#!/usr/bin/perl -w # Convert Debtags vocabulary.gz files into Sleepycat db files # # Copyright (C) 2006 Frank Lichtenheld # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use strict; use warnings; use lib './lib'; $| = 1; use POSIX; use DB_File; use File::Path; use Data::Dumper; use HTML::Entities; use URI::Escape; use Deb::Versions; use Packages::Template; use Packages::Config qw( $TOPDIR @LANGUAGES ); use Packages::I18N::Locale; &Packages::Config::init( './' ); my $debtagsdir = "$TOPDIR/files/debtags"; my $wwwdir = "$TOPDIR/www/about"; my $voc_file = "$debtagsdir/vocabulary"; my (%voc, %voc_db); $/ = ""; sub process_desc { my ($desc) = @_; if ($desc) { $desc = encode_entities($desc); $desc =~ s,((ftp|http|https)://[\S~-]+?/?)((\>\;)?[)]?[']?[:.\,]?(\s|$)),$1$3,go; # syntax highlighting -> ']; $desc =~ s/\A //o; $desc =~ s/\n /\n/sgo; $desc =~ s/\n.\n/\n

\n/go; $desc =~ s/(((\n|\A) [^\n]*)+)/\n

$1\n<\/pre>/sgo;
    }
    return $desc;
}

print "Parsing Vocabulary...\n";
tie %voc_db, "DB_File", "$debtagsdir/vocabulary.db.new",
    O_RDWR|O_CREAT, 0666, $DB_BTREE
    or die "Error creating DB: $!";
open VOC, '<', $voc_file or die "Error opening vocabulary: $!";

while () {
    next if /^\s*$/;
    my $data = "";
    my %data = ();
    chomp;
    s/\n /\377/g;
    while (/^(\S+):\s*(.*)\s*$/mg) {
	my ($key, $value) = ($1, $2);
	$value =~ s/\377/\n /g;
	$key =~ tr [A-Z] [a-z];
	$data{$key} = $value;
    }
    my $voc_key = $data{facet} || $data{tag};
    unless ($voc_key) {
	warn "No key found in ".Dumper(\%data);
	next;
    }
    if ($voc{$voc_key}) {
	warn "Duplicated key found: $voc_key\n";
	next;
    }
    my ($sdesc,$ldesc) = split /\n/, $data{description}, 2;

    $data{html_description} = [ encode_entities($sdesc), process_desc($ldesc)||"" ];
    $voc_db{$voc_key} = $sdesc || "";

    foreach my $lang (@LANGUAGES) {
	next if $lang eq 'en';

	my $cat = Packages::I18N::Locale->get_handle($lang)
	    or die "get_handle failed for $lang";

	my $sdesc_trans = $cat->maketext($sdesc);
	$voc_db{"$voc_key-$lang"} = $sdesc_trans
	    if $sdesc_trans and $sdesc_trans ne $sdesc;
    }

    $voc{$voc_key} = \%data;
}

close VOC or warn "Couldn't close vocabulary: $!";

#print Dumper(\%voc,\%voc_db);

print "Creating tag list...\n";

-d $wwwdir || mkpath( $wwwdir );
open TAGLST, '>', "$wwwdir/debtags.en.html.new"
    or die "Error creating tag list: $!";

my $template = new Packages::Template( "$TOPDIR/templates", 'html', {} );
my @facets = sort( grep { exists $voc{$_}{facet} } keys %voc );
my @tags = sort( grep { exists $voc{$_}{tag} } keys %voc );
my %tags_by_facet;
foreach (@tags) {
    my ($facet, $tag) = split m/::/, $_, 2;
    warn "No facet data available for $facet\n"
	unless exists $voc{$facet};
    $tags_by_facet{$facet} ||= [];
    push @{$tags_by_facet{$facet}}, $_;
}
my %content = ( vocabulary => \%voc,
		facets => \@facets, tags => \@tags,
		tags_by_facet => \%tags_by_facet,
		used_langs => [ 'en' ]);
print TAGLST $template->page( 'tag_index', \%content );
close TAGLST or warn "Couldn't close tag list: $!";

rename( "$wwwdir/debtags.en.html.new",
	"$wwwdir/debtags.en.html" );

untie %voc_db;
rename( "$debtagsdir/vocabulary.db.new",
	"$debtagsdir/vocabulary.db" );