Dipanjan dipanjannag

## sample
from __future__ import unicode_literals, print_function

import json
from pathlib import Path

from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_EN

SAMPLE_DATASET_PATH = Path(__file__).parent / "sample_dataset.json"

## a.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dipanjannag
                / a.ipynb
            
            
              Created
              April 23, 2018 14:20
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## a.ipnb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",

## audio_check.py
from scipy.io import wavfile
filename = 'your_favourite_audio.wav'
rate, data = wavfile.read('angry/a01.wav')
print len(data)

## run_new_model.sh
online2-wav-nnet3-latgen-faster \
  --online=false \
  --do-endpointing=false \
  --frame-subsampling-factor=3 \
  --config=new/conf/online.conf \
  --max-active=7000 \
  --beam=15.0 \
  --lattice-beam=6.0 \
  --acoustic-scale=1.0 \
  --word-symbol-table=new/graph/words.txt \

## build.sh
#!/bin/sh
# run this file from $KALDI_HOME/egs/aspire/s5/ directory. And also this script assumes
# your words.dic and lm.arpa is present in the following directory
# $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict

# First create the required directories for our new model
mkdir -p new/local/dict
mkdir -p new/local/lang

# lets copy some data files

## create_lm.sh
#!/bin/sh
cat corpus.txt | tr '[:lower:]' '[:upper:]' > corpus_upper.txt
ngram-count -text corpus_upper.txt -order 3 -limit-vocab -vocab words.txt -unk -map-unk "<unk>" -kndiscount -interpolate -lm lm.arpa

## g2p_model.sh
#!/bin/sh
# g2p model generates pronunciation of a words after it is trained. But for training it needs
# at least some words to pronunciation mapping. For our purpose we can use the cmudict already
# provided in Kaldi in $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict directory.
# you should run this script from the above directory

g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1

g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1
g2p.py --model model-1 --test cmudict_SPHINX_40 > model-1-test

## links
http://www.math.utah.edu/~wisnia/glossary.html#s

## spread.csv

          
            delta

            
              0.6952

            
              0.4795

            
              0.9692

            
              0.1335

            
              2.0096

            
              0.9209

            
              0.4426

            
              0.9409

            
              2.2962
	from __future__ import unicode_literals, print_function

	import json
	from pathlib import Path

	from snips_nlu import SnipsNLUEngine, load_resources
	from snips_nlu.default_configs import CONFIG_EN

	SAMPLE_DATASET_PATH = Path(__file__).parent / "sample_dataset.json"
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	from scipy.io import wavfile
	filename = 'your_favourite_audio.wav'
	rate, data = wavfile.read('angry/a01.wav')
	print len(data)
	online2-wav-nnet3-latgen-faster \
	--online=false \
	--do-endpointing=false \
	--frame-subsampling-factor=3 \
	--config=new/conf/online.conf \
	--max-active=7000 \
	--beam=15.0 \
	--lattice-beam=6.0 \
	--acoustic-scale=1.0 \
	--word-symbol-table=new/graph/words.txt \
	#!/bin/sh
	# run this file from $KALDI_HOME/egs/aspire/s5/ directory. And also this script assumes
	# your words.dic and lm.arpa is present in the following directory
	# $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict

	# First create the required directories for our new model
	mkdir -p new/local/dict
	mkdir -p new/local/lang

	# lets copy some data files
	#!/bin/sh
	cat corpus.txt \| tr '[:lower:]' '[:upper:]' > corpus_upper.txt
	ngram-count -text corpus_upper.txt -order 3 -limit-vocab -vocab words.txt -unk -map-unk "<unk>" -kndiscount -interpolate -lm lm.arpa
	#!/bin/sh
	# g2p model generates pronunciation of a words after it is trained. But for training it needs
	# at least some words to pronunciation mapping. For our purpose we can use the cmudict already
	# provided in Kaldi in $KALDI_HOME/egs/aspire/s5/data/local/dict/cmudict/sphinxdict directory.
	# you should run this script from the above directory

	g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1

	g2p.py --train cmudict_SPHINX_40 --devel 5% --write-model model-1
	g2p.py --model model-1 --test cmudict_SPHINX_40 > model-1-test