Skip to content

Instantly share code, notes, and snippets.

@mzaksana
Created November 18, 2019 04:10
Show Gist options
  • Save mzaksana/0a150d4ab9d372b1131cebcc65820a1e to your computer and use it in GitHub Desktop.
Save mzaksana/0a150d4ab9d372b1131cebcc65820a1e to your computer and use it in GitHub Desktop.
data prepare log cmd
cd ../../Program/
ls
cd controllers/
ls
cat Program.php
ls
cd /home/mza/Documents/Pro/IndoAcro/data/core/
ls
cd ..
ls
cd base/
ls
cd ..
ls
mkdir tmp-1
cd tmp-1/
l
scp -P 222 zikri@dmir.cs.unsyiah.ac.id:~/data/indoacro/tmp/* .
cd /var/www/html/IndoAcro/
l
php index.php Program makeTable
88
144
ls
cd application/modules/CoreApi/per
cd application/modules/CoreApi/perl/l
cd application/modules/CoreApi/perl/
ls
perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180101-20180228.txt && perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180701-20180829.txt && perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180901-20181031.txt
ssh zikri@dmir.cs.unsyiah.ac.id -p 222
mysql -u pro -p
ls
history
mysql -u pro -p
cd
cd Rains/
cd SpeeachRC/
ls
cd d
cd data/
ls
cd mapping/
ls
cp text.only corpus.txt
vim corpus.txt
cut corpus.txt -d' ' -f2- > tmp
cat tmp
mv tmp corpus.txt
cat tmp
cat corpus.txt
ls
cp corpus.txt vocab.txt
vim corpus.txt
vim vocab.txt
tr text.only
tr text.only "\s" "\n"
tr "\s" "\n" vocab.txt
cat vocab.txt | tr "\s" "\n"
cat vocab.txt | tr " " "\n"
cat vocab.txt | tr " " "\n" > vocab.txt
cat vocab.txt
cp corpus.txt vocab.txt
cat vocab.txt | tr " " "\n" > vocab.txt.d
cat vocab.txt
cat vocab.txt.d
mv vocab.txt.d vocab.txt
ls
cat vocab.txt
cat vocab.txt | sort aa
cat vocab.txt | sort > aa
mv aa vocab.txt
ls
cat vocab.txt
cat vocab.txt | sort | uniq> aa
mv aa vocab.txt
cat vocab.txt
l
ls
cat corpus.txt
l
cat vocab.txt
ls
mv vocab.txt vocab-full.txt
ls
cp corpus.txt vocab-full.txt ../../../Tools/xzkaldi/egs/zcommonvoice/s5/data/dataset/
ls
cd Rains/Tools/
ls
cd xzkaldi/
l
cd egs/commonvoice/
ls
cd s5/
ls
ls exp/
ls exp/make_mfcc/valid_train/
cd ..
ls
cd s5/
cat run.sh
vim run.sh
ls
cd data/
ls
cd valid_train/
l
cd ..
ls
rm -r lang lang_test/ local/
sudo rm -r lang lang_test/ local/
ls
clear
ls
mkdir local
cd local/
l
mkdir dict
ls
cd dict/
ls
source ../../../../../../tools/env.sh
cp ../../valid_train/text .
ls
head text
vim text
cut -d' ' text -f2- > corpus.txt
cat corpus.txt
ls
vim corpus.txt
ls
ngram-count -order 3 -write-vocab vocab-full.txt -wbdiscount -text corpus.txt -lm lm.gz
ls
gunzip -k lm.gz
vim lm
ls
cd ..
ls
svn co http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict cmudict
ls
cd cmudict/
k
l
perl scripts/make_baseform.pl cmudict.0.7a /dev/stdout | sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' | tr '[A-Z]' '[a-z]' > cmudict-plain.txt
vim cmudict-plain.txt
vim cmudict.0.7a
cd ../dict/
ls
awk 'NR==FNR{words[$1]; next;} !($1 in words)' \
awk 'NR==FNR{words[$1]; next;} !($1 in words)'
awk 'NR==FNR{words[$1]; next;} !($1 in words)' ../cmudict/cmudict-plain.txt vocab-full.txt | egrep -v '<.?s>' > vocab-oov.txt
ls
vim vocab-oov.txt
ls
wget http://www.speech.cs.cmu.edu/tools/product/1574044008_32187/7626.dict -O lexicon-oov.txt
ls
vim lexicon-oov.txt
ls
awk 'NR==FNR{words[$1]; next;} ($1 in words)' vocab-full.txt ../cmudict/cmudict-plain.txt | egrep -v '<.?s>' > lexicon-iv.txt
l
cat lexicon-oov.txt lexicon-iv.txt | sort > lexicon.txt
echo SIL > silence_phones.txt
echo SIL > optional_silence.txt
grep -v -w sil lexicon.txt | awk '{for(n=2;n<=NF;n++) { p[$n]=1; }} END{for(x in p) {print x}}' | sort > nonsilence_phones.txt
l
echo -e "<unk>\tSIL" >> lexicon.txt
touch extra_questions.txt
vim lexicon.txt
vim nonsilence_phones.txt
cd ..
ls
cd ../..
utils/prepare_lang.sh data/local/dict '<unk>' data/local/lang data/lang
ls
cd data/lang/
l
vim phones.txt
ls
cd ../..
l
cd conf/
ls
vim mfcc.conf
vim mfcc_hires.conf
ls
vim mfcc_hires.conf
ls
cd ..
l
ls
steps/make_mfcc.sh
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 4 --cmd "run.pl" data/valid_test data/valid_test/log data/valid_test/feats
cd data/valid_test/
ls
cd log/
l
vim make_mfcc_valid_test.1.log
history
history | tail 2034
history | tail -2034
history | tail -200 > ~/Documents/Data/log
2----------------
cut corpus.txt -d' ' -f2- > tmp
cat tmp
mv tmp corpus.txt
cat tmp
cat corpus.txt
ls
cp corpus.txt vocab.txt
vim corpus.txt
vim vocab.txt
tr text.only
tr text.only "\s" "\n"
tr "\s" "\n" vocab.txt
cat vocab.txt | tr "\s" "\n"
cat vocab.txt | tr " " "\n"
cat vocab.txt | tr " " "\n" > vocab.txt
cat vocab.txt
cp corpus.txt vocab.txt
cat vocab.txt | tr " " "\n" > vocab.txt.d
cat vocab.txt
cat vocab.txt.d
mv vocab.txt.d vocab.txt
ls
cat vocab.txt
cat vocab.txt | sort aa
cat vocab.txt | sort > aa
mv aa vocab.txt
ls
cat vocab.txt
cat vocab.txt | sort | uniq> aa
mv aa vocab.txt
cat vocab.txt
l
ls
cat corpus.txt
l
cat vocab.txt
ls
mv vocab.txt vocab-full.txt
ls
cp corpus.txt vocab-full.txt ../../../Tools/xzkaldi/egs/zcommonvoice/s5/data/dataset/
ls
vim local/download_and_untar.sh
vim local/prepare_lm.sh
vim local/prepare_dict.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment