Skip to content

Instantly share code, notes, and snippets.

@jimregan
Last active January 17, 2017 18:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimregan/12175d7127493c3b6f2d5e234b401413 to your computer and use it in GitHub Desktop.
Save jimregan/12175d7127493c3b6f2d5e234b401413 to your computer and use it in GitHub Desktop.
mktextgrid.pl - makes a Praat textgrid file from speech recognition, word and phone levels.
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
use charnames ':full';
use Audio::Wav;
use Data::Dumper;
my @rwords = qw(heed hid hayed head had pam matter ant palm
mater aunt hod hawed hoed hudd hood who'd
hide hoyd how'd petite beard gird bared heard
hard horticulture lord hoard hurd gourd hired
soured pertain horse hoarse bird lourdes bire sir);
# curl http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones|awk '($2 == "vowel"){print $1}' |tr '\n' ' '
my @rvowels = qw(AA AE AH AO AW AY EH ER EY IH IY OW OY UH UW);
if($#ARGV != 2) {
print "Usage: mktextgrid.pl <word file> <phone file> <wav file>\n";
exit;
}
open (WORDS, '<', $ARGV[0]);
open (PHONS, '<', $ARGV[1]);
open (OUT, '>', "$ARGV[0].TextGrid");
binmode(WORDS, ":encoding(utf8)");
binmode(PHONS, ":encoding(utf8)");
binmode(OUT, ":encoding(UTF16-BE)");
my $wav = new Audio::Wav;
my $wavfile = $wav->read($ARGV[2]);
my $wavlen = $wavfile->length_seconds();
my %words = ();
for my $w (@rwords) {
$words{$w} = 1;
}
my %vowels = ();
for my $v (@rvowels) {
$vowels{$v} = 1;
}
my @entries;
my @pentries;
my @stimes;
my @etimes;
while(<WORDS>) {
my %word = ();
if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) {
my $tword = $1;
my $tstart = $2;
my $tend = $3;
my $tprob = $4;
if(exists $words{$tword}) {
$word{'word'} = $tword;
$word{'start'} = $tstart;
$word{'end'} = $tend;
$word{'prob'} = $tprob;
push @entries, \%word;
push @stimes, $tstart;
push @etimes, $tend;
}
} else {
next;
}
}
my $timeidx = 0;
my $laststime = $stimes[$timeidx];
my $lastetime = $etimes[$timeidx];
while(<PHONS>) {
last if ($timeidx > $#stimes);
# it's a phonetic symbol here, but copy/paste...
my %word = ();
if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) {
my $tword = $1;
my $tstart = $2;
my $tend = $3;
my $tprob = $4;
next if($tstart <= $laststime);
if($tstart >= $lastetime) {
$timeidx++;
$laststime = $stimes[$timeidx];
$lastetime = $etimes[$timeidx];
next;
}
if(exists $vowels{$tword}) {
$word{'word'} = $tword;
$word{'start'} = $tstart;
$word{'end'} = $tend;
$word{'prob'} = $tprob;
push @pentries, \%word;
}
} else {
next;
}
}
my $outentries = ($#entries + 1) * 2;
print OUT "\N{BOM}";
print OUT <<__HERE__;
File type = "ooTextFile"
Object class = "TextGrid"
xmin = 0
xmax = $wavlen
tiers? <exists>
size = 4
item []:
item [1]:
class = "IntervalTier"
name = "word"
xmin = 0
xmax = $wavlen
intervals: size = $outentries
__HERE__
my $cnt = 1;
my $ltime = 0;
for my $went (@entries) {
print OUT " intervals [$cnt]:\n";
$cnt++;
print OUT " xmin = $ltime\n";
print OUT " xmax = $$went{'start'}\n";
print OUT " text = \"\"\n";
print OUT " intervals [$cnt]:\n";
$cnt++;
print OUT " xmin = $$went{'start'}\n";
print OUT " xmax = $$went{'end'}\n";
print OUT " text = \"$$went{'word'}\"\n";
$ltime = $$went{'end'};
}
$outentries = ($#pentries + 1) * 2;
print OUT <<__HERE__;
item [2]:
class = "IntervalTier"
name = "vowel"
xmin = 0
xmax = $wavlen
intervals: size = $outentries
__HERE__
$cnt = 1;
$ltime = 0;
for my $pent (@pentries) {
print OUT " intervals [$cnt]:\n";
$cnt++;
print OUT " xmin = $ltime\n";
print OUT " xmax = $$pent{'start'}\n";
print OUT " text = \"\"\n";
print OUT " intervals [$cnt]:\n";
$cnt++;
print OUT " xmin = $$pent{'start'}\n";
print OUT " xmax = $$pent{'end'}\n";
print OUT " text = \"$$pent{'word'}\"\n";
$ltime = $$pent{'end'};
}
print OUT <<__HERE__;
item [3]:
class = "IntervalTier"
name = "vowel2"
xmin = 0
xmax = $wavlen
intervals: size = 0
item [4]:
class = "IntervalTier"
name = "measure"
xmin = 0
xmax = $wavlen
intervals: size = 0
__HERE__
text2wfreq < phon.corp | wfreq2vocab > phon.corp.vocab
cat phon.corp|tr ' ' '\n'|sort|uniq > phon.closed
text2idngram -vocab phon.corp.vocab -temp /tmp/ < phon.closed > phon.idngram
idngram2lm -vocab_type 0 -idngram phon.idngram -vocab phon.corp.vocab -arpa phon.lm
sphinx_lm_convert -i phon.lm -o phon.lm.bin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment