X-Git-Url: https://git.deb.at/?p=deb%2Fpackages.git;a=blobdiff_plain;f=bin%2Fparse-translations;h=d079b7f99634425aa548b45a71afcae65d34d34e;hp=436e435d1519ab77942ad66b77e1b1ec98c952db;hb=1deb5ef4f37a6ccea16346836a7097a43a257978;hpb=36cd772d5715368c75f6aa7bef3dc526aa876a76 diff --git a/bin/parse-translations b/bin/parse-translations index 436e435..d079b7f 100755 --- a/bin/parse-translations +++ b/bin/parse-translations @@ -2,9 +2,8 @@ # Convert Translation.gz files into Sleepycat db files for efficient usage of # data # -# $Id$ -# # Copyright (C) 2006 Jeroen van Wolffelaar +# Copyright (C) 2007 Frank Lichtenheld # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or @@ -17,7 +16,7 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use strict; use warnings; @@ -32,6 +31,7 @@ use DB_File; use Storable; use File::Path; use Digest::MD5; +use Text::Iconv; use Deb::Versions; use Lingua::Stem v0.82; use Search::Xapian; @@ -43,10 +43,19 @@ $/ = ""; -d $DBDIR || mkpath( $DBDIR ); +my $fixja = Text::Iconv->new("EUC-JP", "UTF-8"); + +# FIXME: one database per dist +# http://lists.debian.org/4E42E104.90201@deb-support.de +# FIXME: unhardcode dists name +my @dists = ('sid', 'wheezy', 'squeeze', 'lenny'); + foreach my $lang (@DDTP_LANGUAGES) { - print "Reading Translations for $lang..."; - open PKG, "zcat $TOPDIR/archive/*/*/*/i18n/Translation-$lang.gz|"; + (my $locale = $lang) =~ s/^([a-z]{2})-([a-z]{2})$/"$1_".uc($2)/e; + print "Reading Translations for $lang ($locale)..."; my $count = 0; + foreach my $dist (@dists) { + open PKG, "bzcat $TOPDIR/archive/*/$dist/*/i18n/Translation-$locale.bz2|"; while () { next if /^\s*$/; my $data = ""; @@ -62,10 +71,16 @@ foreach my $lang (@DDTP_LANGUAGES) { # Skip double descriptions next if exists($descriptions{$data{"description-md5"}}{$lang}); # some weirdnesses in the files - next unless defined $data{"description-".lc($lang)}; - $descriptions{$data{"description-md5"}}{$lang} = $data{"description-".lc($lang)}; + next unless defined $data{"description-".lc($locale)}; + if ($lang eq 'ja') { + my $fixed = $fixja->convert($data{"description-ja"}); + $data{"description-ja"} = $fixed if $fixed; + } + $descriptions{$data{"description-md5"}}{$lang} = + $data{"description-".lc($locale)}; $count++; } + } print "($count)\n"; } close PKG;