From 0d8eec4fee0812400e3b65560f4df8eca9e1a448 Mon Sep 17 00:00:00 2001 From: Frank Lichtenheld Date: Sun, 17 Jun 2007 18:48:44 +0200 Subject: [PATCH] parse-translations: Fix encoding of Japanese descriptions They should be in UTF-8, but actually they are in EUC-JP. --- bin/parse-translations | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/parse-translations b/bin/parse-translations index 436e435..fa9855d 100755 --- a/bin/parse-translations +++ b/bin/parse-translations @@ -32,6 +32,7 @@ use DB_File; use Storable; use File::Path; use Digest::MD5; +use Text::Iconv; use Deb::Versions; use Lingua::Stem v0.82; use Search::Xapian; @@ -43,6 +44,8 @@ $/ = ""; -d $DBDIR || mkpath( $DBDIR ); +my $fixja = Text::Iconv->new("EUC-JP", "UTF-8"); + foreach my $lang (@DDTP_LANGUAGES) { print "Reading Translations for $lang..."; open PKG, "zcat $TOPDIR/archive/*/*/*/i18n/Translation-$lang.gz|"; @@ -63,6 +66,10 @@ foreach my $lang (@DDTP_LANGUAGES) { next if exists($descriptions{$data{"description-md5"}}{$lang}); # some weirdnesses in the files next unless defined $data{"description-".lc($lang)}; + if ($lang eq 'ja') { + my $fixed = $fixja->convert($data{"description-ja"}); + $data{"description-ja"} = $fixed if $fixed; + } $descriptions{$data{"description-md5"}}{$lang} = $data{"description-".lc($lang)}; $count++; } -- 2.39.2