Skip to content

Instantly share code, notes, and snippets.

@alhoo
Last active February 1, 2019 15:58
Show Gist options
  • Save alhoo/fad37ea417cdd3baf34b0b3091e3545b to your computer and use it in GitHub Desktop.
Save alhoo/fad37ea417cdd3baf34b0b3091e3545b to your computer and use it in GitHub Desktop.
Predict next audio frame
"""
!pip install matplotlib scipy numpy keras python_speech_features
"""
import scipy
import scipy.io.wavfile
from python_speech_features import mfcc
from matplotlib import pyplot as plt
import numpy as np
from keras.layers import Dense, Input, CuDNNLSTM, TimeDistributed
from keras.models import Sequential
def datagen(data, batch_size=16, history_length=10):
batches = arange(len(data) - history_length)
np.random.shuffle(batches)
for i in range(len(batches) - batch_size):
X = stack([data[p:p+history_length,:] for p in batches[i:i+batch_size]])
y = stack([data[p+1:p+1+history_length,:] for p in batches[i:i+batch_size]])
yield X, y
def get_model(history_length=10, feature_width=13):
model = Sequential()
model.add(Dense(64, input_shape=(history_length, feature_width)))
model.add(CuDNNLSTM(128, return_sequences=True))
model.add(CuDNNLSTM(128, return_sequences=True))
model.add(TimeDistributed(Dense(64, activation="relu")))
model.add(Dense(feature_width, activation="linear"))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
history_length=10
feature_width=13
batch_size=16
wavfile = scipy.io.wavfile.read('/var/data/Data/audio/output_201812102202.wav')
channel1 = wavfile[1][:,0]
data = mfcc(channel1, wavfile[0])
model = get_model(history_length, feature_width)
model.fit_generator(datagen(data, batch_size, history_length), steps_per_epoch=1000000, epochs=1)
X, y = datagen(data).__next__()
plt.imshow(model.predict(X)[-1])
plt.imshow(y[-1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment