Skip to content

Instantly share code, notes, and snippets.

@osamu2001
Created June 12, 2009 14:15
Show Gist options
  • Save osamu2001/128653 to your computer and use it in GitHub Desktop.
Save osamu2001/128653 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Encode;
my $fcode = "";
my $mode = "";
my $msgid = "";
my $msgstr = "";
&print_tmx_header();
while(<STDIN>){
if($fcode eq ""){
$fcode = $1 if (/charset=([-a-zA-Z0-9]+)/);
next;
}
my $line = decode($fcode, $_);
chomp $line;
if((0 == length($line ))&&(0 != length($msgstr))&&(0 != length($msgid ))){
&print_tuv_en_b();
print encode("utf8",&HTMLSanitize($msgid));
&print_tuv_en_e();
&print_tuv_ja_b();
print encode("utf8",&HTMLSanitize($msgstr));
&print_tuv_ja_e();
next;
}
if($mode eq "msgid"){
if ($line =~ /^"(.*)"$/){
my $str = $1;
$str =~ s/\\n/\n/;
if(0==length($msgid)){
$msgid = $str;
}else{
$msgid = $msgid . $str;
}
next;
}
}
if($mode eq "msgstr"){
if ($line =~ /^"(.*)"$/){
my $str = $1;
$str =~ s/\\n/\n/;
if(0==length($msgstr)){
$msgstr = $str;
}else{
$msgstr = $msgstr . $str;
}
next;
}
}
if ($line =~ /^msgid "(.*)"$/){
$mode = "msgid";
$msgid = $1;
next;
}
if($line =~ /^msgstr "(.*)"$/){
$mode = "msgstr";
$msgstr = $1;
next;
}
}
&print_tmx_footer();
sub print_tuv{
my ($ens,$jas) = @_;
&print_tuv_en_b();
print encode("utf8",$ens);
&print_tuv_en_e();
&print_tuv_ja_b();
print encode("utf8",$jas);
&print_tuv_ja_e();
}
sub print_tuv_en_b(){
print << "TUV_EN";
<tu>
<tuv lang="EN-US">
TUV_EN
print "<seg>";
}
sub print_tuv_en_e(){
print << "TUV_EN";
</seg>
</tuv>
TUV_EN
}
sub print_tuv_ja_b(){
print << "TUV_JA";
<tuv lang="JA">
TUV_JA
print "<seg>";
}
sub print_tuv_ja_e(){
print << "TUV_JA";
</seg>
</tuv>
</tu>
TUV_JA
}
sub print_tmx_footer(){
print << "TMX_FOOTER";
</body>
</tmx>
TMX_FOOTER
}
sub print_tmx_header(){
print << "TMX_HEADER";
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE tmx SYSTEM "tmx11.dtd">
<tmx version="1.1">
<header
creationtool="OmegaT"
creationtoolversion="2.0.2_1"
segtype="sentence"
o-tmf="OmegaT TMX"
adminlang="EN-US"
srclang="EN-US"
datatype="plaintext"
>
</header>
<body>
TMX_HEADER
}
sub HTMLSanitize {
my $str = shift;
$str =~ s/&/&amp;/g;
$str =~ s/</&lt;/g;
$str =~ s/>/&gt;/g;
$str =~ s/"/&quot;/g;
return $str;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment