Skip to content

Instantly share code, notes, and snippets.

@jimregan
Last active February 8, 2023 10:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimregan/c4a80da0203b04257397c56932d4c838 to your computer and use it in GitHub Desktop.
Save jimregan/c4a80da0203b04257397c56932d4c838 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my %skip = map { $_ => 1 } qw(lj_d ll_d m_d nj_d nn_d r_d rj_d);
while(<>) {
chomp;
s/\r//g;
next if(/^#/);
next if(/^$/);
my @line = split/\t/;
if($#line < 1) {
print STDERR "Error: $_\n";
}
next if($line[1] eq '');
my @l = split/ /, lc($line[0]);
my @r = split/ /, $line[1];
if($#l != $#r) {
print STDERR "Part mismatch: $_\n";
next;
}
for (my $i = 0; $i <= $#l; $i++) {
my $ro = $r[$i];
if($ro eq '<EPS>') {
$ro = '';
}
if(!exists $skip{$ro}) {
if($ro eq 'h_r_d') {
$ro = 'h r_d';
} elsif ($ro eq 'r_d_h') {
$ro = 'r_d h';
} else {
$ro =~ s/_/ /g;
}
}
print "alignable { input: \"$l[$i]\" output: \"$ro\" }\n";
}
}
alignable { input: "-" output: "" }
alignable { input: "a" output: "A:" }
alignable { input: "a" output: "E j" }
alignable { input: "a" output: "a" }
alignable { input: "aa" output: "a" }
alignable { input: "aa" output: "A:" } # Aarne
alignable { input: "aa" output: "o:" } # Aagaard
alignable { input: "ah" output: "A:" }
alignable { input: "au" output: "a*U" }
alignable { input: "b" output: "b e:" }
alignable { input: "b" output: "b" }
alignable { input: "c" output: "k" }
alignable { input: "c" output: "s e:" }
alignable { input: "ch" output: "S" }
alignable { input: "ch" output: "k" }
alignable { input: "d" output: "d e:" }
alignable { input: "d" output: "d" }
alignable { input: "dd" output: "d" }
alignable { input: "dt" output: "t" }
alignable { input: "e" output: "" }
alignable { input: "e" output: "E" }
alignable { input: "e" output: "E:" }
alignable { input: "e" output: "e" }
alignable { input: "e" output: "e:" }
alignable { input: "ea" output: "i:" }
alignable { input: "ee" output: "e:" }
alignable { input: "eh" output: "e" }
alignable { input: "ei" output: "E" }
alignable { input: "ei" output: "i:" }
alignable { input: "eu" output: "E*U" }
alignable { input: "eu" output: "j }:" }
alignable { input: "f" output: "E f" }
alignable { input: "f" output: "e f" }
alignable { input: "f" output: "f" }
alignable { input: "fv" output: "v" }
alignable { input: "g" output: "S" }
alignable { input: "g" output: "g" }
alignable { input: "g" output: "j" }
alignable { input: "gh" output: "g" }
alignable { input: "gh" output: "j" }
alignable { input: "h" output: "h o:" }
alignable { input: "h" output: "h" }
alignable { input: "hj" output: "j" }
alignable { input: "i" output: "I" }
alignable { input: "i" output: "i:" }
alignable { input: "i" output: "j" }
alignable { input: "ih" output: "i:" }
alignable { input: "ii" output: "i:" }
alignable { input: "j" output: "j" }
alignable { input: "k" output: "k o:" }
alignable { input: "k" output: "k" }
alignable { input: "k" output: "s'" }
alignable { input: "kk" output: "k" }
alignable { input: "l" output: "e l" }
alignable { input: "l" output: "l" }
alignable { input: "lj" output: "j" }
alignable { input: "ll" output: "l" }
alignable { input: "m" output: "E m" }
alignable { input: "m" output: "m" }
alignable { input: "n" output: "e n" }
alignable { input: "n" output: "n" }
alignable { input: "ng" output: "N g" }
alignable { input: "ng" output: "N" }
alignable { input: "nn" output: "n" }
alignable { input: "o" output: "9" }
alignable { input: "o" output: "O" }
alignable { input: "o" output: "U" }
alignable { input: "o" output: "e" }
alignable { input: "o" output: "o:" }
alignable { input: "o" output: "u:" }
alignable { input: "p" output: "p e:" }
alignable { input: "p" output: "p" }
alignable { input: "ph" output: "p" }
alignable { input: "pp" output: "p" }
alignable { input: "r" output: "r" }
alignable { input: "rd" output: "d`" }
alignable { input: "rdt" output: "t`" }
alignable { input: "rl" output: "l`" }
alignable { input: "rn" output: "n`" }
alignable { input: "rnd" output: "n` d`" }
alignable { input: "rnl" output: "n` l`" }
alignable { input: "rns" output: "n` s`" }
alignable { input: "rndt" output: "n` t`" }
alignable { input: "rnt" output: "n` t`" }
alignable { input: "rnts" output: "n` t` s`" }
alignable { input: "rntss" output: "n` t` s`" }
alignable { input: "rs" output: "s`" }
alignable { input: "rsd" output: "s` d`" }
alignable { input: "rsl" output: "s` l`" }
alignable { input: "rsn" output: "s` n`" }
alignable { input: "rss" output: "s`" }
alignable { input: "rst" output: "s` t`" }
alignable { input: "rt" output: "t`" }
alignable { input: "rts" output: "t` s`" }
alignable { input: "s" output: "s" }
alignable { input: "s" output: "s`" }
alignable { input: "si" output: "s`" }
alignable { input: "ss" output: "s" }
alignable { input: "ss" output: "s`" }
alignable { input: "sz" output: "s" }
alignable { input: "t" output: "t e:" }
alignable { input: "t" output: "t" }
alignable { input: "th" output: "t" }
alignable { input: "ti" output: "S" }
alignable { input: "tj" output: "s'" }
alignable { input: "tt" output: "t" }
alignable { input: "u" output: "U" }
alignable { input: "u" output: "u0" }
alignable { input: "u" output: "}:" }
alignable { input: "v" output: "v e:" }
alignable { input: "v" output: "v" }
alignable { input: "w" output: "v" }
alignable { input: "x" output: "k s" }
alignable { input: "y" output: "Y" }
alignable { input: "y" output: "j" }
alignable { input: "z" output: "s" }
alignable { input: "z" output: "s`" }
alignable { input: "ä" output: "E" }
alignable { input: "å" output: "o:" }
alignable { input: "è" output: "E:" }
alignable { input: "é" output: "e:" }
alignable { input: "ö" output: "2:" }
alignable { input: "ö" output: "9" }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment