This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(len(x_val)): | |
print("Review:",seq2text(x_val[i])) | |
print("Original summary:",seq2summary(y_val[i])) | |
print("Predicted summary:",decode_sequence(x_val[i].reshape(1,max_len_text))) | |
print("\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels=os.listdir(train_audio_path) | |
#find count of each label and plot bar graph | |
no_of_recordings=[] | |
for label in labels: | |
waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')] | |
no_of_recordings.append(len(waves)) | |
#plot | |
plt.figure(figsize=(30,5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
duration_of_recordings=[] | |
for label in labels: | |
waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')] | |
for wav in waves: | |
sample_rate, samples = wavfile.read(train_audio_path + '/' + label + '/' + wav) | |
duration_of_recordings.append(float(len(samples)/sample_rate)) | |
plt.hist(np.array(duration_of_recordings)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_audio_path = '../input/tensorflow-speech-recognition-challenge/train/audio/' | |
all_wave = [] | |
all_label = [] | |
for label in labels: | |
print(label) | |
waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')] | |
for wav in waves: | |
samples, sample_rate = librosa.load(train_audio_path + '/' + label + '/' + wav, sr = 16000) | |
samples = librosa.resample(samples, sample_rate, 8000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.layers import Dense, Dropout, Flatten, Conv1D, Input, MaxPooling1D | |
from keras.models import Model | |
from keras.callbacks import EarlyStopping, ModelCheckpoint | |
from keras import backend as K | |
K.clear_session() | |
inputs = Input(shape=(8000,1)) | |
#First Conv1D layer | |
conv = Conv1D(8,13, padding='valid', activation='relu', strides=1)(inputs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(audio): | |
prob=model.predict(audio.reshape(1,8000,1)) | |
index=np.argmax(prob[0]) | |
return classes[index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
index=random.randint(0,len(x_val)-1) | |
samples=x_val[index].ravel() | |
print("Audio:",classes[np.argmax(y_val[index])]) | |
ipd.Audio(samples, rate=8000) | |
print("Text:",predict(samples)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
os.listdir('../input/voice-commands/prateek_voice_v2') | |
filepath='../input/voice-commands/prateek_voice_v2' | |
#reading the voice commands | |
samples, sample_rate = librosa.load(filepath + '/' + 'stop.wav', sr = 16000) | |
samples = librosa.resample(samples, sample_rate, 8000) | |
ipd.Audio(samples,rate=8000) | |
predict(samples) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import librosa #for audio processing | |
import IPython.display as ipd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from scipy.io import wavfile #for audio processing | |
import warnings | |
warnings.filterwarnings("ignore") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not", | |
"didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not", | |
"he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", | |
"I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would", | |
"i'd've": "i would have", "i'll": "i will", "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", |