Created
April 20, 2018 13:52
-
-
Save dipanjannag/411624a88795f852c3a9a4c10df16e26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# run this file from $KALDI_HOME/egs/aspire/s5/ directory. And also this script assumes | |
# your words.dic and lm.arpa is present in the following directory | |
# $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict | |
# First create the required directories for our new model | |
mkdir -p new/local/dict | |
mkdir -p new/local/lang | |
# lets copy some data files | |
cp data/local/dict/extra_questions.txt new/local/dict/ | |
cp data/local/dict/nonsilence_phones.txt new/local/dict/ | |
cp data/local/dict/optional_silence.txt new/local/dict/ | |
cp data/local/dict/silence_phones.txt new/local/dict/ | |
cp data/local/dict/cmudict/sphinxdict/words.dic new/local/dict/lexicon.txt | |
cp data/local/dict/cmudict/sphinxdict/lm.arpa new/local/lang/lm.arpa | |
# we have all our data. Lets set up some environment | |
. cmd.sh | |
. path.sh | |
# Set the paths of our input files into variables | |
model=exp/tdnn_7b_chain_online | |
phones_src=exp/tdnn_7b_chain_online/phones.txt | |
dict_src=new/local/dict | |
lm_src=new/local/lang/lm.arpa | |
lang=new/lang | |
dict=new/dict | |
dict_tmp=new/dict_tmp | |
graph=new/graph | |
# Compile the word lexicon (L.fst) | |
utils/prepare_lang.sh --phone-symbol-table $phones_src $dict_src "<unk>" $dict_tmp $dict | |
# Compile the grammar/language model (G.fst) | |
gzip < $lm_src > $lm_src.gz | |
utils/format_lm.sh $dict $lm_src.gz $dict_src/lexicon.txt $lang | |
# Finally assemble the HCLG graph | |
utils/mkgraph.sh --self-loop-scale 1.0 $lang $model $graph | |
# To use our newly created model, we must also build a decoding configuration, the following line will create these for us into the new/conf directory | |
steps/online/nnet3/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf $dict exp/nnet3/extractor exp/chain/tdnn_7b new | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment