alhoo/audio_predict.py

## audio_predict.py
"""
!pip install matplotlib scipy numpy keras python_speech_features
"""

import scipy
import scipy.io.wavfile
from python_speech_features import mfcc
from matplotlib import pyplot as plt
import numpy as np
from keras.layers import Dense, Input, CuDNNLSTM, TimeDistributed
from keras.models import Sequential

def datagen(data, batch_size=16, history_length=10):
    batches = arange(len(data) - history_length)
    np.random.shuffle(batches)
    for i in range(len(batches) - batch_size):
        X = stack([data[p:p+history_length,:] for p in batches[i:i+batch_size]])
        y = stack([data[p+1:p+1+history_length,:] for p in batches[i:i+batch_size]])
        yield X, y

def get_model(history_length=10, feature_width=13):
    model = Sequential()
    model.add(Dense(64, input_shape=(history_length, feature_width)))
    model.add(CuDNNLSTM(128, return_sequences=True))
    model.add(CuDNNLSTM(128, return_sequences=True))
    model.add(TimeDistributed(Dense(64, activation="relu")))
    model.add(Dense(feature_width, activation="linear"))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

history_length=10
feature_width=13
batch_size=16

wavfile = scipy.io.wavfile.read('/var/data/Data/audio/output_201812102202.wav')
channel1 = wavfile[1][:,0]
data = mfcc(channel1, wavfile[0])

model = get_model(history_length, feature_width)

model.fit_generator(datagen(data, batch_size, history_length), steps_per_epoch=1000000, epochs=1)

X, y = datagen(data).__next__()
plt.imshow(model.predict(X)[-1])
plt.imshow(y[-1])
	"""
	!pip install matplotlib scipy numpy keras python_speech_features
	"""

	import scipy
	import scipy.io.wavfile
	from python_speech_features import mfcc
	from matplotlib import pyplot as plt
	import numpy as np
	from keras.layers import Dense, Input, CuDNNLSTM, TimeDistributed
	from keras.models import Sequential

	def datagen(data, batch_size=16, history_length=10):
	batches = arange(len(data) - history_length)
	np.random.shuffle(batches)
	for i in range(len(batches) - batch_size):
	X = stack([data[p:p+history_length,:] for p in batches[i:i+batch_size]])
	y = stack([data[p+1:p+1+history_length,:] for p in batches[i:i+batch_size]])
	yield X, y

	def get_model(history_length=10, feature_width=13):
	model = Sequential()
	model.add(Dense(64, input_shape=(history_length, feature_width)))
	model.add(CuDNNLSTM(128, return_sequences=True))
	model.add(CuDNNLSTM(128, return_sequences=True))
	model.add(TimeDistributed(Dense(64, activation="relu")))
	model.add(Dense(feature_width, activation="linear"))
	model.compile(loss='mean_squared_error', optimizer='adam')
	return model

	history_length=10
	feature_width=13
	batch_size=16

	wavfile = scipy.io.wavfile.read('/var/data/Data/audio/output_201812102202.wav')
	channel1 = wavfile[1][:,0]
	data = mfcc(channel1, wavfile[0])

	model = get_model(history_length, feature_width)

	model.fit_generator(datagen(data, batch_size, history_length), steps_per_epoch=1000000, epochs=1)

	X, y = datagen(data).__next__()
	plt.imshow(model.predict(X)[-1])
	plt.imshow(y[-1])