This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
my $type = shift @ARGV; | |
$set_id = "myset"; | |
$doc_id = "mydoc"; | |
$sys_id = "mysys"; | |
if ($type eq 'src') { | |
$src_lang = shift @ARGV; | |
print "<srcset setid=\"$set_id\" srclang=\"$src_lang\">\n"; | |
# print "<doc docid=\"$doc_id\" sysid=\"$sys_id\">\n"; | |
print "<doc docid=\"$doc_id\">\n"; | |
} | |
elsif ($type eq 'ref') { | |
$src_lang = shift @ARGV; | |
$trg_lang = shift @ARGV; | |
print "<refset setid=\"$set_id\" srclang=\"$src_lang\" trglang=\"$trg_lang\">\n"; | |
print "<doc docid=\"$doc_id\" sysid=\"$sys_id\">\n"; | |
# print "<doc docid=\"$doc_id\">\n"; | |
} | |
$seg_id = 1; | |
while ($line = <STDIN>) | |
{ | |
$line =~ s/^\s+//; | |
$line =~ s/\s+$//; | |
# escape some XML characters | |
# so that the tools that expect proper XML input don't crash | |
$line =~ s/</</; | |
$line =~ s/>/>/; | |
print "<seg id=\"$seg_id\">$line</seg>\n"; | |
$seg_id++; | |
} | |
if ($type eq 'src') { | |
print "</doc>\n"; | |
print "</srcset>\n"; | |
} | |
elsif ($type eq 'ref') { | |
print "</doc>\n"; | |
print "</refset>\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment