jimregan/mktextgrid.pl

## mktextgrid.pl
#!/usr/bin/perl

use warnings;
use strict;
use utf8;
use charnames ':full';
use Audio::Wav;
use Data::Dumper;

my @rwords = qw(heed hid hayed head had pam matter ant palm
mater aunt hod hawed hoed hudd hood who'd
hide hoyd how'd petite beard gird bared heard
hard horticulture lord hoard hurd gourd hired
soured pertain horse hoarse bird lourdes bire sir);

# curl http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones|awk '($2 == "vowel"){print $1}' |tr '\n' ' '
my @rvowels = qw(AA AE AH AO AW AY EH ER EY IH IY OW OY UH UW);

if($#ARGV != 2) {
    print "Usage: mktextgrid.pl <word file> <phone file> <wav file>\n";
    exit;
}

open (WORDS, '<', $ARGV[0]);
open (PHONS, '<', $ARGV[1]);
open (OUT, '>', "$ARGV[0].TextGrid");
binmode(WORDS, ":encoding(utf8)");
binmode(PHONS, ":encoding(utf8)");
binmode(OUT, ":encoding(UTF16-BE)");

my $wav = new Audio::Wav;

my $wavfile = $wav->read($ARGV[2]);
my $wavlen = $wavfile->length_seconds();

my %words = ();
for my $w (@rwords) {
    $words{$w} = 1;
}

my %vowels = ();
for my $v (@rvowels) {
    $vowels{$v} = 1;
}

my @entries;
my @pentries;
my @stimes;
my @etimes;

while(<WORDS>) {
    my %word = ();
    if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) {
        my $tword = $1;
        my $tstart = $2;
        my $tend = $3;
        my $tprob = $4;
        if(exists $words{$tword}) {
            $word{'word'} = $tword;
            $word{'start'} = $tstart;
            $word{'end'} = $tend;
            $word{'prob'} = $tprob;
            push @entries, \%word;
            push @stimes, $tstart;
            push @etimes, $tend;
        }
    } else {
        next;
    }
}

my $timeidx = 0;
my $laststime = $stimes[$timeidx];
my $lastetime = $etimes[$timeidx];

while(<PHONS>) {
    last if ($timeidx > $#stimes);
    # it's a phonetic symbol here, but copy/paste...
    my %word = ();
    if(/^([^ ]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*) ([0-9]*\.[0-9]*)/) {
        my $tword = $1;
        my $tstart = $2;
        my $tend = $3;
        my $tprob = $4;

        next if($tstart <= $laststime);
        if($tstart >= $lastetime) {
            $timeidx++;
            $laststime = $stimes[$timeidx];
            $lastetime = $etimes[$timeidx];
            next;
        }
        if(exists $vowels{$tword}) {
            $word{'word'} = $tword;
            $word{'start'} = $tstart;
            $word{'end'} = $tend;
            $word{'prob'} = $tprob;
            push @pentries, \%word;
        }
    } else {
        next;
    }
}

my $outentries = ($#entries + 1) * 2;
print OUT "\N{BOM}";
print OUT <<__HERE__;
File type = "ooTextFile"
Object class = "TextGrid"

xmin = 0
xmax = $wavlen
tiers? <exists>
size = 4
item []:
    item [1]:
        class = "IntervalTier"
        name = "word"
        xmin = 0
        xmax = $wavlen
        intervals: size = $outentries
__HERE__

my $cnt = 1;
my $ltime = 0;
for my $went (@entries) {
    print OUT "        intervals [$cnt]:\n";
    $cnt++;
    print OUT "            xmin = $ltime\n";
    print OUT "            xmax = $$went{'start'}\n";
    print OUT "            text = \"\"\n";
    print OUT "        intervals [$cnt]:\n";
    $cnt++;
    print OUT "            xmin = $$went{'start'}\n";
    print OUT "            xmax = $$went{'end'}\n";
    print OUT "            text = \"$$went{'word'}\"\n";
    $ltime = $$went{'end'};
}

$outentries = ($#pentries + 1) * 2;
print OUT <<__HERE__;
    item [2]:
        class = "IntervalTier"
        name = "vowel"
        xmin = 0
        xmax = $wavlen
        intervals: size = $outentries
__HERE__


$cnt = 1;
$ltime = 0;
for my $pent (@pentries) {
    print OUT "        intervals [$cnt]:\n";
    $cnt++;
    print OUT "            xmin = $ltime\n";
    print OUT "            xmax = $$pent{'start'}\n";
    print OUT "            text = \"\"\n";
    print OUT "        intervals [$cnt]:\n";
    $cnt++;
    print OUT "            xmin = $$pent{'start'}\n";
    print OUT "            xmax = $$pent{'end'}\n";
    print OUT "            text = \"$$pent{'word'}\"\n";
    $ltime = $$pent{'end'};
}

print OUT <<__HERE__;
    item [3]:
        class = "IntervalTier"
        name = "vowel2"
        xmin = 0
        xmax = $wavlen
        intervals: size = 0
    item [4]:
        class = "IntervalTier"
        name = "measure"
        xmin = 0
        xmax = $wavlen
        intervals: size = 0
__HERE__

## remakelm.sh
text2wfreq < phon.corp | wfreq2vocab > phon.corp.vocab
cat phon.corp|tr ' ' '\n'|sort|uniq > phon.closed
text2idngram -vocab phon.corp.vocab -temp /tmp/  < phon.closed > phon.idngram
idngram2lm -vocab_type 0 -idngram phon.idngram -vocab phon.corp.vocab -arpa phon.lm
sphinx_lm_convert -i phon.lm -o phon.lm.bin
	#!/usr/bin/perl

	use warnings;
	use strict;
	use utf8;
	use charnames ':full';
	use Audio::Wav;
	use Data::Dumper;

	my @rwords = qw(heed hid hayed head had pam matter ant palm
	mater aunt hod hawed hoed hudd hood who'd
	hide hoyd how'd petite beard gird bared heard
	hard horticulture lord hoard hurd gourd hired
	soured pertain horse hoarse bird lourdes bire sir);

	# curl http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones\|awk '($2 == "vowel"){print $1}' \|tr '\n' ' '
	my @rvowels = qw(AA AE AH AO AW AY EH ER EY IH IY OW OY UH UW);

	if($#ARGV != 2) {
	print "Usage: mktextgrid.pl <word file> <phone file> <wav file>\n";
	exit;
	}

	open (WORDS, '<', $ARGV[0]);
	open (PHONS, '<', $ARGV[1]);
	open (OUT, '>', "$ARGV[0].TextGrid");
	binmode(WORDS, ":encoding(utf8)");
	binmode(PHONS, ":encoding(utf8)");
	binmode(OUT, ":encoding(UTF16-BE)");

	my $wav = new Audio::Wav;

	my $wavfile = $wav->read($ARGV[2]);
	my $wavlen = $wavfile->length_seconds();

	my %words = ();
	for my $w (@rwords) {
	$words{$w} = 1;
	}

	my %vowels = ();
	for my $v (@rvowels) {
	$vowels{$v} = 1;
	}

	my @entries;
	my @pentries;
	my @stimes;
	my @etimes;

	while(<WORDS>) {
	my %word = ();
	if(/^([^ ]) ([0-9]\.[0-9]) ([0-9]\.[0-9]) ([0-9]\.[0-9]*)/) {
	my $tword = $1;
	my $tstart = $2;
	my $tend = $3;
	my $tprob = $4;
	if(exists $words{$tword}) {
	$word{'word'} = $tword;
	$word{'start'} = $tstart;
	$word{'end'} = $tend;
	$word{'prob'} = $tprob;
	push @entries, \%word;
	push @stimes, $tstart;
	push @etimes, $tend;
	}
	} else {
	next;
	}
	}

	my $timeidx = 0;
	my $laststime = $stimes[$timeidx];
	my $lastetime = $etimes[$timeidx];

	while(<PHONS>) {
	last if ($timeidx > $#stimes);
	# it's a phonetic symbol here, but copy/paste...
	my %word = ();
	if(/^([^ ]) ([0-9]\.[0-9]) ([0-9]\.[0-9]) ([0-9]\.[0-9]*)/) {
	my $tword = $1;
	my $tstart = $2;
	my $tend = $3;
	my $tprob = $4;

	next if($tstart <= $laststime);
	if($tstart >= $lastetime) {
	$timeidx++;
	$laststime = $stimes[$timeidx];
	$lastetime = $etimes[$timeidx];
	next;
	}
	if(exists $vowels{$tword}) {
	$word{'word'} = $tword;
	$word{'start'} = $tstart;
	$word{'end'} = $tend;
	$word{'prob'} = $tprob;
	push @pentries, \%word;
	}
	} else {
	next;
	}
	}

	my $outentries = ($#entries + 1) * 2;
	print OUT "\N{BOM}";
	print OUT <<__HERE__;
	File type = "ooTextFile"
	Object class = "TextGrid"

	xmin = 0
	xmax = $wavlen
	tiers? <exists>
	size = 4
	item []:
	item [1]:
	class = "IntervalTier"
	name = "word"
	xmin = 0
	xmax = $wavlen
	intervals: size = $outentries
	__HERE__

	my $cnt = 1;
	my $ltime = 0;
	for my $went (@entries) {
	print OUT " intervals [$cnt]:\n";
	$cnt++;
	print OUT " xmin = $ltime\n";
	print OUT " xmax = $$went{'start'}\n";
	print OUT " text = \"\"\n";
	print OUT " intervals [$cnt]:\n";
	$cnt++;
	print OUT " xmin = $$went{'start'}\n";
	print OUT " xmax = $$went{'end'}\n";
	print OUT " text = \"$$went{'word'}\"\n";
	$ltime = $$went{'end'};
	}

	$outentries = ($#pentries + 1) * 2;
	print OUT <<__HERE__;
	item [2]:
	class = "IntervalTier"
	name = "vowel"
	xmin = 0
	xmax = $wavlen
	intervals: size = $outentries
	__HERE__


	$cnt = 1;
	$ltime = 0;
	for my $pent (@pentries) {
	print OUT " intervals [$cnt]:\n";
	$cnt++;
	print OUT " xmin = $ltime\n";
	print OUT " xmax = $$pent{'start'}\n";
	print OUT " text = \"\"\n";
	print OUT " intervals [$cnt]:\n";
	$cnt++;
	print OUT " xmin = $$pent{'start'}\n";
	print OUT " xmax = $$pent{'end'}\n";
	print OUT " text = \"$$pent{'word'}\"\n";
	$ltime = $$pent{'end'};
	}

	print OUT <<__HERE__;
	item [3]:
	class = "IntervalTier"
	name = "vowel2"
	xmin = 0
	xmax = $wavlen
	intervals: size = 0
	item [4]:
	class = "IntervalTier"
	name = "measure"
	xmin = 0
	xmax = $wavlen
	intervals: size = 0
	__HERE__
	text2wfreq < phon.corp \| wfreq2vocab > phon.corp.vocab
	cat phon.corp\|tr ' ' '\n'\|sort\|uniq > phon.closed
	text2idngram -vocab phon.corp.vocab -temp /tmp/ < phon.closed > phon.idngram
	idngram2lm -vocab_type 0 -idngram phon.idngram -vocab phon.corp.vocab -arpa phon.lm
	sphinx_lm_convert -i phon.lm -o phon.lm.bin