Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/perl
# Copyright (c) 2015-2016 Trinity College, Dublin
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
#!/usr/bin/perl
my @in = qw(asfalt asfaltu asfalcie);
sub longest_suffix {
my $list = shift;
my $first = shift(@{$list});
for my $second(@in) {
my @a = split(//, $first);
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
open(IN, "<", "$ARGV[0]");
binmode(IN, ":encoding(latin-1)");
#open(OUT, ">", "collisions.txt");
#binmode(OUT, ":utf8");
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
binmode(STDIN, ":utf8");
my %tweak = (
'áit éigin' => '',
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
sub unfada {
@jimregan
jimregan / aev-aev_FONIPA.xml
Last active May 2, 2017 07:29
Transliterators
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
@jimregan
jimregan / fp-phon-collisions.pl
Created February 27, 2017 23:30
Search for homophones
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
open(IN, "<", "$ARGV[0]");
binmode(IN, ":encoding(latin-1)");
#open(OUT, ">", "collisions.txt");
#binmode(OUT, ":utf8");
@jimregan
jimregan / cng-cat.pl
Last active March 9, 2017 08:25
CNG utils
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
binmode(STDIN, ":encoding(UTF-16LE)");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
muirear6far muirearófar
z-hiodrocsaídéfheinile 2-hiodrocsaídéfheinile
2 hiodrocsaídéfheinil 2-hiodrocsaídéfheinil
m6tarbháid mótarbháid
ndílucht6idh ndíluchtóidh
s6rt sórt
n-ord6idh n-ordóidh
agdéileálaí ag déileálaí
aghlacadh a ghlacadh
agusoibriúcháin agus oibriúcháin
@jimregan
jimregan / mktextgrid.pl
Last active January 17, 2017 18:50
mktextgrid.pl - makes a Praat textgrid file from speech recognition, word and phone levels.
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
use charnames ':full';
use Audio::Wav;
use Data::Dumper;
my @rwords = qw(heed hid hayed head had pam matter ant palm