Created
October 2, 2019 04:51
-
-
Save bravo325806/bc9455fdea870f5d8723440298616d80 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
import os | |
import re | |
import sys | |
import wave | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.contrib import rnn | |
from random import shuffle | |
from tensorflow.python.tools import freeze_graph | |
from tensorflow.python.tools import optimize_for_inference_lib | |
n_input = 20 | |
n_steps = 432 | |
n_hidden = 256 | |
n_classes = 2 | |
learning_rate = 0.001 | |
training_iters = 100 | |
batch_size = 32 | |
display_step = 1 | |
path = "./dataset/train" | |
MODEL_NAME = 'voice' | |
input_node_name = 'input' | |
output_node_name = 'output' | |
class_label = ['mayday','twice'] | |
train_set = [] | |
def get_list(): | |
for root, dirs, files in os.walk(path): | |
for file in files: | |
label = root.split('/')[-1] | |
tmp = {'path':root+'/'+file, 'label':label} | |
train_set.append(tmp) | |
return train_set | |
def mfcc_batch_generator(batch_size=32): | |
batch_features = [] | |
labels = [] | |
while True: | |
shuffle(train_set) | |
for file in train_set: | |
wave, sr = librosa.load(file['path']) | |
mfcc = librosa.feature.mfcc(wave, sr) | |
label = dense_to_one_hot(class_label.index(file['label']), n_classes) | |
labels.append(label) | |
mfcc = np.pad(mfcc,((0,0),(0,n_steps-len(mfcc[0]))), mode='constant', constant_values=0) | |
batch_features.append(np.array(mfcc).T) | |
if len(batch_features) >= batch_size: | |
# print(type(np.array(batch_features))) | |
# print(len(np.array(batch_features))) | |
# print(type(np.array(batch_features[0]))) | |
# print(len(np.array(batch_features[0]))) | |
yield np.array(batch_features), np.array(labels) | |
batch_features = [] # Reset for next batch | |
labels = [] | |
def dense_to_one_hot(labels_dense, num_classes=2): | |
return np.eye(num_classes)[labels_dense] | |
def RNN(x, weights, biases): | |
x = tf.unstack(x, n_steps, 1) | |
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0, reuse=tf.get_variable_scope().reuse) | |
#lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, reuse=tf.get_variable_scope().reuse) | |
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) | |
print(np.array(outputs).shape) | |
return tf.add(tf.matmul(outputs[-1], weights['out']) , biases['out']) | |
def save_graph_to_file(sess, path): | |
output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, [output_node_name]) | |
with tf.gfile.FastGFile(path, 'wb') as f: | |
f.write(output_graph_def.SerializeToString()) | |
get_list() | |
x = tf.placeholder(dtype=tf.float32, shape=[None, n_steps, n_input], name=input_node_name) | |
y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes]) | |
weights = { | |
'out': tf.Variable(tf.random_normal([n_hidden, n_classes])) | |
} | |
biases = { | |
'out': tf.Variable(tf.random_normal([n_classes])) | |
} | |
pred = RNN(x, weights, biases) | |
outputs = tf.nn.softmax(pred, name=output_node_name) | |
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y)) | |
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) | |
correct_pred = tf.equal(tf.argmax(outputs, 1), tf.argmax(y, 1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) | |
tf.summary.scalar("loss", cost) | |
tf.summary.scalar("accuracy",accuracy) | |
merged_summary_op = tf.summary.merge_all() | |
saver = tf.train.Saver() | |
init = tf.global_variables_initializer() | |
with tf.Session() as sess: | |
sess.run(init) | |
tf.train.write_graph(sess.graph_def, 'out', MODEL_NAME + '.pbtxt', True) | |
summary_writer = tf.summary.FileWriter('logs/', graph=tf.get_default_graph()) | |
step = 1 | |
while step < training_iters: | |
batch = mfcc_batch_generator(batch_size) | |
batch_x, batch_y = next(batch) | |
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) | |
if step % display_step == 0: | |
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y}) | |
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y}) | |
print("Iter " + str(step) + ", Minibatch Loss = " + \ | |
"{:.6f}".format(loss) + ", Training Accuracy = " + \ | |
"{:.5f}".format(acc)) | |
step = step + 1 | |
_, summary = sess.run([optimizer, merged_summary_op], | |
feed_dict={x: batch_x, y: batch_y}) | |
summary_writer.add_summary(summary, step) | |
saver.save(sess, 'out/' + MODEL_NAME + '.ckpt') | |
save_graph_to_file(sess, 'out/' + MODEL_NAME + '.pb') | |
print("Optimization Finished!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment