Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View dipanjannag's full-sized avatar
🏠
Working from home

Dipanjan dipanjannag

🏠
Working from home
View GitHub Profile
from __future__ import unicode_literals, print_function
import json
from pathlib import Path
from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_EN
SAMPLE_DATASET_PATH = Path(__file__).parent / "sample_dataset.json"
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
from scipy.io import wavfile
filename = 'your_favourite_audio.wav'
rate, data = wavfile.read('angry/a01.wav')
print len(data)
online2-wav-nnet3-latgen-faster \
--online=false \
--do-endpointing=false \
--frame-subsampling-factor=3 \
--config=new/conf/online.conf \
--max-active=7000 \
--beam=15.0 \
--lattice-beam=6.0 \
--acoustic-scale=1.0 \
--word-symbol-table=new/graph/words.txt \
#!/bin/sh
# run this file from $KALDI_HOME/egs/aspire/s5/ directory. And also this script assumes
# your words.dic and lm.arpa is present in the following directory
# $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict
# First create the required directories for our new model
mkdir -p new/local/dict
mkdir -p new/local/lang
# lets copy some data files
#!/bin/sh
cat corpus.txt | tr '[:lower:]' '[:upper:]' > corpus_upper.txt
ngram-count -text corpus_upper.txt -order 3 -limit-vocab -vocab words.txt -unk -map-unk "<unk>" -kndiscount -interpolate -lm lm.arpa
#!/bin/sh
# g2p model generates pronunciation of a words after it is trained. But for training it needs
# at least some words to pronunciation mapping. For our purpose we can use the cmudict already
# provided in Kaldi in $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict directory.
# you should run this script from the above directory
g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1
g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1
g2p.py --model model-1 --test cmudict_SPHINX_40 > model-1-test
delta
0.6952
0.4795
0.9692
0.1335
2.0096
0.9209
0.4426
0.9409
2.2962