Last active
January 17, 2017 18:50
-
-
Save jimregan/12175d7127493c3b6f2d5e234b401413 to your computer and use it in GitHub Desktop.
mktextgrid.pl - makes a Praat textgrid file from speech recognition, word and phone levels.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use warnings; | |
use strict; | |
use utf8; | |
use charnames ':full'; | |
use Audio::Wav; | |
use Data::Dumper; | |
my @rwords = qw(heed hid hayed head had pam matter ant palm | |
mater aunt hod hawed hoed hudd hood who'd | |
hide hoyd how'd petite beard gird bared heard | |
hard horticulture lord hoard hurd gourd hired | |
soured pertain horse hoarse bird lourdes bire sir); | |
# curl http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones|awk '($2 == "vowel"){print $1}' |tr '\n' ' ' | |
my @rvowels = qw(AA AE AH AO AW AY EH ER EY IH IY OW OY UH UW); | |
if($#ARGV != 2) { | |
print "Usage: mktextgrid.pl <word file> <phone file> <wav file>\n"; | |
exit; | |
} | |
open (WORDS, '<', $ARGV[0]); | |
open (PHONS, '<', $ARGV[1]); | |
open (OUT, '>', "$ARGV[0].TextGrid"); | |
binmode(WORDS, ":encoding(utf8)"); | |
binmode(PHONS, ":encoding(utf8)"); | |
binmode(OUT, ":encoding(UTF16-BE)"); | |
my $wav = new Audio::Wav; | |
my $wavfile = $wav->read($ARGV[2]); | |
my $wavlen = $wavfile->length_seconds(); | |
my %words = (); | |
for my $w (@rwords) { | |
$words{$w} = 1; | |
} | |
my %vowels = (); | |
for my $v (@rvowels) { | |
$vowels{$v} = 1; | |
} | |
my @entries; | |
my @pentries; | |
my @stimes; | |
my @etimes; | |
while(<WORDS>) { | |
my %word = (); | |
if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) { | |
my $tword = $1; | |
my $tstart = $2; | |
my $tend = $3; | |
my $tprob = $4; | |
if(exists $words{$tword}) { | |
$word{'word'} = $tword; | |
$word{'start'} = $tstart; | |
$word{'end'} = $tend; | |
$word{'prob'} = $tprob; | |
push @entries, \%word; | |
push @stimes, $tstart; | |
push @etimes, $tend; | |
} | |
} else { | |
next; | |
} | |
} | |
my $timeidx = 0; | |
my $laststime = $stimes[$timeidx]; | |
my $lastetime = $etimes[$timeidx]; | |
while(<PHONS>) { | |
last if ($timeidx > $#stimes); | |
# it's a phonetic symbol here, but copy/paste... | |
my %word = (); | |
if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) { | |
my $tword = $1; | |
my $tstart = $2; | |
my $tend = $3; | |
my $tprob = $4; | |
next if($tstart <= $laststime); | |
if($tstart >= $lastetime) { | |
$timeidx++; | |
$laststime = $stimes[$timeidx]; | |
$lastetime = $etimes[$timeidx]; | |
next; | |
} | |
if(exists $vowels{$tword}) { | |
$word{'word'} = $tword; | |
$word{'start'} = $tstart; | |
$word{'end'} = $tend; | |
$word{'prob'} = $tprob; | |
push @pentries, \%word; | |
} | |
} else { | |
next; | |
} | |
} | |
my $outentries = ($#entries + 1) * 2; | |
print OUT "\N{BOM}"; | |
print OUT <<__HERE__; | |
File type = "ooTextFile" | |
Object class = "TextGrid" | |
xmin = 0 | |
xmax = $wavlen | |
tiers? <exists> | |
size = 4 | |
item []: | |
item [1]: | |
class = "IntervalTier" | |
name = "word" | |
xmin = 0 | |
xmax = $wavlen | |
intervals: size = $outentries | |
__HERE__ | |
my $cnt = 1; | |
my $ltime = 0; | |
for my $went (@entries) { | |
print OUT " intervals [$cnt]:\n"; | |
$cnt++; | |
print OUT " xmin = $ltime\n"; | |
print OUT " xmax = $$went{'start'}\n"; | |
print OUT " text = \"\"\n"; | |
print OUT " intervals [$cnt]:\n"; | |
$cnt++; | |
print OUT " xmin = $$went{'start'}\n"; | |
print OUT " xmax = $$went{'end'}\n"; | |
print OUT " text = \"$$went{'word'}\"\n"; | |
$ltime = $$went{'end'}; | |
} | |
$outentries = ($#pentries + 1) * 2; | |
print OUT <<__HERE__; | |
item [2]: | |
class = "IntervalTier" | |
name = "vowel" | |
xmin = 0 | |
xmax = $wavlen | |
intervals: size = $outentries | |
__HERE__ | |
$cnt = 1; | |
$ltime = 0; | |
for my $pent (@pentries) { | |
print OUT " intervals [$cnt]:\n"; | |
$cnt++; | |
print OUT " xmin = $ltime\n"; | |
print OUT " xmax = $$pent{'start'}\n"; | |
print OUT " text = \"\"\n"; | |
print OUT " intervals [$cnt]:\n"; | |
$cnt++; | |
print OUT " xmin = $$pent{'start'}\n"; | |
print OUT " xmax = $$pent{'end'}\n"; | |
print OUT " text = \"$$pent{'word'}\"\n"; | |
$ltime = $$pent{'end'}; | |
} | |
print OUT <<__HERE__; | |
item [3]: | |
class = "IntervalTier" | |
name = "vowel2" | |
xmin = 0 | |
xmax = $wavlen | |
intervals: size = 0 | |
item [4]: | |
class = "IntervalTier" | |
name = "measure" | |
xmin = 0 | |
xmax = $wavlen | |
intervals: size = 0 | |
__HERE__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text2wfreq < phon.corp | wfreq2vocab > phon.corp.vocab | |
cat phon.corp|tr ' ' '\n'|sort|uniq > phon.closed | |
text2idngram -vocab phon.corp.vocab -temp /tmp/ < phon.closed > phon.idngram | |
idngram2lm -vocab_type 0 -idngram phon.idngram -vocab phon.corp.vocab -arpa phon.lm | |
sphinx_lm_convert -i phon.lm -o phon.lm.bin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment