AdolfVonKleist/little-test.sh

## little-test.sh
#!/bin/bash

if [ $# -ne 2 ]
then
    echo "USAGE: $0 <lexicon> <order>"
    echo "  Recommended: order=6~9 for English"
    exit
fi

#Triage the lexicon a bit to overcome my poor design
cat ${1} \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\s+/);
                 $w = shift(@_);
                 $p = join(" ",@_);
                 $p =~ s/_/,/g;
                 print $w."\t".$p."\n";
               }' \
    > ${1}.triaged.lex

#Run the aligner with some reasonable params (for English)
phonetisaurus-align --input=${1}.triaged.lex --ofile=${1}.triaged.corpus --seq1_del=false
echo ""

#Train an n-gram model
# Note: You can use *any* tool that outputs a valid ARPA-format LM
# Recommended: Interpolate a Kneser-Ney model with a MaxEnt model (latest SRILM),
#   rescore with an RNNLM (if you want to get fancy)
# If possible results can be further improved by pruning pronunciations with
# a further forced alignment step with your recognizer
estimate-ngram -o ${2} -t ${1}.triaged.corpus -s FixModKN -wl ${1}.triaged.${2}g.arpa
echo ""

#Convert the model
phonetisaurus-arpa2wfst-omega --lm=${1}.triaged.${2}g.arpa --ofile=${1}.triaged.${2}g.fst
echo ""

#Test an input word, and fix the phoneme we triaged
phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=A-bomb \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\t/);
                 $_[2] =~ s/,/_/g;
                 print join("\t",@_)."\n";
              }'
echo ""

#Test a list of input words, get the 5-best, and fixe the phoneme we triaged
for w in A-frame A-line; do echo $w; done > short.wlist

phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=short.wlist --isfile=true \
    --nbest=5 --decoder_type=fst_phi \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\t/);
                 $_[2] =~ s/,/_/g;
                 print join("\t",@_)."\n";
              }'

echo ""
	#!/bin/bash

	if [ $# -ne 2 ]
	then
	echo "USAGE: $0 <lexicon> <order>"
	echo " Recommended: order=6~9 for English"
	exit
	fi

	#Triage the lexicon a bit to overcome my poor design
	cat ${1} \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\s+/);
	$w = shift(@_);
	$p = join(" ",@_);
	$p =~ s/_/,/g;
	print $w."\t".$p."\n";
	}' \
	> ${1}.triaged.lex

	#Run the aligner with some reasonable params (for English)
	phonetisaurus-align --input=${1}.triaged.lex --ofile=${1}.triaged.corpus --seq1_del=false
	echo ""

	#Train an n-gram model
	# Note: You can use any tool that outputs a valid ARPA-format LM
	# Recommended: Interpolate a Kneser-Ney model with a MaxEnt model (latest SRILM),
	# rescore with an RNNLM (if you want to get fancy)
	# If possible results can be further improved by pruning pronunciations with
	# a further forced alignment step with your recognizer
	estimate-ngram -o ${2} -t ${1}.triaged.corpus -s FixModKN -wl ${1}.triaged.${2}g.arpa
	echo ""

	#Convert the model
	phonetisaurus-arpa2wfst-omega --lm=${1}.triaged.${2}g.arpa --ofile=${1}.triaged.${2}g.fst
	echo ""

	#Test an input word, and fix the phoneme we triaged
	phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=A-bomb \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\t/);
	$_[2] =~ s/,/_/g;
	print join("\t",@_)."\n";
	}'
	echo ""

	#Test a list of input words, get the 5-best, and fixe the phoneme we triaged
	for w in A-frame A-line; do echo $w; done > short.wlist

	phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=short.wlist --isfile=true \
	--nbest=5 --decoder_type=fst_phi \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\t/);
	$_[2] =~ s/,/_/g;
	print join("\t",@_)."\n";
	}'

	echo ""