This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
duration_of_recordings=[] | |
for label in labels: | |
waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')] | |
for wav in waves: | |
sample_rate, samples = wavfile.read(train_audio_path + '/' + label + '/' + wav) | |
duration_of_recordings.append(float(len(samples)/sample_rate)) | |
plt.hist(np.array(duration_of_recordings)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_audio_path = '../input/tensorflow-speech-recognition-challenge/train/audio/' | |
all_wave = [] | |
all_label = [] | |
for label in labels: | |
print(label) | |
waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')] | |
for wav in waves: | |
samples, sample_rate = librosa.load(train_audio_path + '/' + label + '/' + wav, sr = 16000) | |
samples = librosa.resample(samples, sample_rate, 8000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import LabelEncoder | |
le = LabelEncoder() | |
y=le.fit_transform(all_label) | |
classes= list(le.classes_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.layers import Dense, Dropout, Flatten, Conv1D, Input, MaxPooling1D | |
from keras.models import Model | |
from keras.callbacks import EarlyStopping, ModelCheckpoint | |
from keras import backend as K | |
K.clear_session() | |
inputs = Input(shape=(8000,1)) | |
#First Conv1D layer | |
conv = Conv1D(8,13, padding='valid', activation='relu', strides=1)(inputs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(audio): | |
prob=model.predict(audio.reshape(1,8000,1)) | |
index=np.argmax(prob[0]) | |
return classes[index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
index=random.randint(0,len(x_val)-1) | |
samples=x_val[index].ravel() | |
print("Audio:",classes[np.argmax(y_val[index])]) | |
ipd.Audio(samples, rate=8000) | |
print("Text:",predict(samples)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sounddevice as sd | |
import soundfile as sf | |
samplerate = 16000 | |
duration = 1 # seconds | |
filename = 'yes.wav' | |
print("start") | |
mydata = sd.rec(int(samplerate * duration), samplerate=samplerate, | |
channels=1, blocking=True) | |
print("end") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
os.listdir('../input/voice-commands/prateek_voice_v2') | |
filepath='../input/voice-commands/prateek_voice_v2' | |
#reading the voice commands | |
samples, sample_rate = librosa.load(filepath + '/' + 'stop.wav', sr = 16000) | |
samples = librosa.resample(samples, sample_rate, 8000) | |
ipd.Audio(samples,rate=8000) | |
predict(samples) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df=pd.DataFrame(columns=['energy','start','end']) | |
thresh=12000 | |
row_index=0 | |
for i in range(len(energy)): | |
value=energy[i] | |
if(value>=thresh): | |
i=np.where(energy == value)[0] | |
df.loc[row_index,'energy']=value | |
df.loc[row_index,'start']=i[0] * 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
temp=[] | |
i=0 | |
j=0 | |
n=len(df) - 2 | |
m=len(df) - 1 | |
while(i<=n): | |
j=i+1 | |
while(j<=m): | |
if(df['end'][i] == df['start'][j]): | |
df.loc[i,'end'] = df.loc[j,'end'] |