X-Git-Url: https://git.deb.at/?a=blobdiff_plain;f=bin%2Fparse-translations;h=095820421b1871347712295016b770f1b561434a;hb=5b4300c691bdea12227a957d4aa7b738e30d4d0d;hp=436e435d1519ab77942ad66b77e1b1ec98c952db;hpb=36cd772d5715368c75f6aa7bef3dc526aa876a76;p=deb%2Fpackages.git diff --git a/bin/parse-translations b/bin/parse-translations index 436e435..0958204 100755 --- a/bin/parse-translations +++ b/bin/parse-translations @@ -2,9 +2,8 @@ # Convert Translation.gz files into Sleepycat db files for efficient usage of # data # -# $Id$ -# # Copyright (C) 2006 Jeroen van Wolffelaar +# Copyright (C) 2007 Frank Lichtenheld # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or @@ -17,7 +16,7 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use strict; use warnings; @@ -32,6 +31,7 @@ use DB_File; use Storable; use File::Path; use Digest::MD5; +use Text::Iconv; use Deb::Versions; use Lingua::Stem v0.82; use Search::Xapian; @@ -43,6 +43,8 @@ $/ = ""; -d $DBDIR || mkpath( $DBDIR ); +my $fixja = Text::Iconv->new("EUC-JP", "UTF-8"); + foreach my $lang (@DDTP_LANGUAGES) { print "Reading Translations for $lang..."; open PKG, "zcat $TOPDIR/archive/*/*/*/i18n/Translation-$lang.gz|"; @@ -63,6 +65,10 @@ foreach my $lang (@DDTP_LANGUAGES) { next if exists($descriptions{$data{"description-md5"}}{$lang}); # some weirdnesses in the files next unless defined $data{"description-".lc($lang)}; + if ($lang eq 'ja') { + my $fixed = $fixja->convert($data{"description-ja"}); + $data{"description-ja"} = $fixed if $fixed; + } $descriptions{$data{"description-md5"}}{$lang} = $data{"description-".lc($lang)}; $count++; }