Skip to content

Instantly share code, notes, and snippets.

@TinoDidriksen
Created August 18, 2022 12:18
Show Gist options
  • Save TinoDidriksen/a86bcd668d1df6cff246e6997b87843f to your computer and use it in GitHub Desktop.
Save TinoDidriksen/a86bcd668d1df6cff246e6997b87843f to your computer and use it in GitHub Desktop.
echo -e 'Nuuk\nQaanaaq\nAasiaat' | ~/langtech/kal/tools/shellscripts/kal-tokenise | cg-conv -c -F | grep -F Prop+Abs | lg-multi.pl
#!/usr/bin/env perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
use strict;
use warnings;
use utf8;
BEGIN {
$| = 1;
binmode(STDIN, ':encoding(UTF-8)');
binmode(STDOUT, ':encoding(UTF-8)');
}
use open qw( :encoding(UTF-8) :std );
use feature 'unicode_strings';
sub trim {
$_[0] =~ s/\s+$//sg;
$_[0] =~ s/^\s+//sg;
return $_[0];
}
while (my $l = <STDIN>) {
chomp($l);
$l =~ s@^([^\t]+)\t(.+)$@$2@;
my $wf = $1;
print "# $wf => $l\n";
# List all replacements, including itself if that should be part of the output
foreach my $v (('Abs', 'Abl', 'Trm', 'Lok')) {
my $new = $l;
# Replace the tag to vary with the variant
$new =~ s@\+Abs\+@+$v+@;
# Generate surface forms
my $gen = trim(`echo '$new' | hfst-optimized-lookup -u -p ~/langtech/kal/src/generator-gt-norm.hfstol`);
# For each generated form, inject Katersat semantics
my @gs = split(/\n/, $gen);
foreach my $g (@gs) {
my ($ana,$surf) = ($g =~ m@^([^\t]+)\t(.+)$@);
$ana = "DUMMY\t$ana";
$ana = trim(`echo '$ana' | cg-conv -f -C | ~/langtech/nutserut/dev/kal2eng/bin/long2sem.pl | cg-conv -c -F`);
$ana =~ s@DUMMY@$surf@;
$ana = join("\t", reverse(split(/\t/, $ana)));
print "$ana\n";
}
}
print "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment