denisb411/conv_net_sound_classification.py

## conv_net_sound_classification.py
import os
import librosa.display

import numpy as np
import pandas as pd
import tensorflow as tf

df = pd.read_csv('samples_nylonGuitar_1024_Mm7_R03.csv')

X_load = np.array(df.iloc[:,:-1], dtype=np.float)
y_load = np.array(df.iloc[:,-1], dtype=np.float)

processedData_path = "preprocessedSamples_spect.data"
processedX = np.zeros((len(X_load),256,16,1), dtype=np.float)
processedy = np.zeros(len(y_load), dtype=np.float)

for i in range(len(X_load)):
    sample = librosa.core.stft(y=X_load[i], n_fft=511, hop_length=None, win_length=256, window='hamming', center=True, dtype=np.float32, pad_mode='reflect')
    sample = np.atleast_3d(sample)
    processedX[i] = sample
    processedy[i] = y_load[i]

print(processedX[0].shape)

from sklearn.utils import shuffle
shufled_processedX, shufled_processedy = shuffle(processedX, processedy)

for i in range(len(shufled_processedy)):
	shufled_processedy[i] = (shufled_processedy[i]) - 1

X_train = np.array(shufled_processedX[:-2000], dtype=np.float)
y_train = np.array(shufled_processedy[:-2000], dtype=np.float)

X_valid = np.array(shufled_processedX[-2000:-1000], dtype=np.float)
y_valid = np.array(shufled_processedy[-2000:-1000], dtype=np.float)

X_test = np.array(shufled_processedX[-1000:], dtype=np.float)
y_test = np.array(shufled_processedy[-1000:], dtype=np.float)
print(y_test[999])
print(X_test[999])

print(X_train.shape,y_train.shape, X_valid.shape, y_valid.shape)

n_outputs = len(np.unique(shufled_processedy))
print(np.unique(shufled_processedy))
n_outputs

import tensorflow as tf

height = 256
width = 16
channels = 1
n_inputs = height * width
n_outputs = len(np.unique(processedy))

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None], name="y")
    training = tf.placeholder_with_default(False, shape=[], name='training')

#input: [batch_size, 32, 3, 1]
#output: [batch_size, 32, 3, 32]
conv1_fmaps = 32 #filters
conv1_ksize = [32,3]
conv1_stride = 1
conv1_pad = "SAME"
conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv1")

#input: [batch_size, 32, 3, 32]
#output: [batch_size, 16, 2, 32]
pool1_fmaps = conv1_fmaps
with tf.name_scope("pool1"):
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)


#input: [batch_size, 16, 2, 32]
#output: [batch_size, 16, 2, 64]
conv2_fmaps = 64
conv2_ksize = [16,2]
conv2_stride = 1
conv2_pad = "SAME"
conv2_dropout_rate = 0.25
conv2 = tf.layers.conv2d(pool1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")

#input: [batch_size, 16, 2, 64]
#output1: [batch_size, 8, 1, 64]
#output2: [batch_size, 8 * 1 * 64]
pool2_fmaps = conv2_fmaps
with tf.name_scope("pool2"):
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2_flat = tf.reshape(pool2, shape=[-1, 8 * 1 * pool2_fmaps])
    pool2_flat_drop = tf.layers.dropout(pool2_flat, conv2_dropout_rate, training=training)

n_fc1 = 512
fc1_dropout_rate = 0.4
with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool2_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
    fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)

with tf.name_scope("output"):
    logits = tf.layers.dense(fc1_drop, n_outputs, name="output")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()


n_epochs = 1000
batch_size = 40

best_loss_val = np.infty
check_interval = 10
checks_since_last_progress = 0
max_checks_without_progress = 20
best_model_params = None

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train))
        idx = 0
        for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
        #for idx in range(len(X_train) // batch_size):
#             print(idx)
#             X_reshaped = np.reshape(X_train[idx],(1, -1))
#             y_reshaped = np.reshape(y_train[idx],(-1))
#             print(X_reshaped.shape)
#             print(y_reshaped.shape)
            print(len(rnd_indices))
            X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices].astype(int)
            print(y_batch.shape)
            X_batch_reshaped = np.reshape(X_batch,(len(X_batch), -1))
            y_batch_reshaped = np.reshape(y_batch,(-1))
            sess.run(training_op, feed_dict={X: X_batch_reshaped, y: y_batch_reshaped, training: True})
            print('ohyes')
            if idx % check_interval == 0:
                X_valid_reshaped = np.reshape(X_valid,(len(X_valid), -1))
                loss_val = loss.eval(feed_dict={X: X_valid_reshaped,
                                                y: y_valid})
                print(loss_val)
                if loss_val < best_loss_val:
                    best_loss_val = loss_val
                    checks_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    checks_since_last_progress += 1
            idx += 1
        X_batch_reshaped = np.reshape(X_batch,(len(X_batch), -1))
        acc_train = accuracy.eval(feed_dict={X: X_batch_reshaped, y: y_batch})
        X_valid_reshaped = np.reshape(X_valid,(len(X_valid), -1))
        acc_val = accuracy.eval(feed_dict={X: X_valid_reshaped,
                                           y: y_valid})
        print("Epoch {}, train accuracy: {:.4f}%, valid. accuracy: {:.4f}%, valid. best loss: {:.6f}".format(
                  epoch, acc_train * 100, acc_val * 100, best_loss_val))
        if checks_since_last_progress > max_checks_without_progress:
            print("Early stopping!")
            break

    if best_model_params:
        restore_model_params(best_model_params)
    X_test_reshaped = np.reshape(X_test,(len(X_test), -1))
    acc_test = accuracy.eval(feed_dict={X: X_test_reshaped,
                                        y: y_test})
    print("Final accuracy on test set:", acc_test)
    save_path = saver.save(sess, "./my_model")
	import os
	import librosa.display

	import numpy as np
	import pandas as pd
	import tensorflow as tf

	df = pd.read_csv('samples_nylonGuitar_1024_Mm7_R03.csv')

	X_load = np.array(df.iloc[:,:-1], dtype=np.float)
	y_load = np.array(df.iloc[:,-1], dtype=np.float)

	processedData_path = "preprocessedSamples_spect.data"
	processedX = np.zeros((len(X_load),256,16,1), dtype=np.float)
	processedy = np.zeros(len(y_load), dtype=np.float)

	for i in range(len(X_load)):
	sample = librosa.core.stft(y=X_load[i], n_fft=511, hop_length=None, win_length=256, window='hamming', center=True, dtype=np.float32, pad_mode='reflect')
	sample = np.atleast_3d(sample)
	processedX[i] = sample
	processedy[i] = y_load[i]

	print(processedX[0].shape)

	from sklearn.utils import shuffle
	shufled_processedX, shufled_processedy = shuffle(processedX, processedy)

	for i in range(len(shufled_processedy)):
	shufled_processedy[i] = (shufled_processedy[i]) - 1

	X_train = np.array(shufled_processedX[:-2000], dtype=np.float)
	y_train = np.array(shufled_processedy[:-2000], dtype=np.float)

	X_valid = np.array(shufled_processedX[-2000:-1000], dtype=np.float)
	y_valid = np.array(shufled_processedy[-2000:-1000], dtype=np.float)

	X_test = np.array(shufled_processedX[-1000:], dtype=np.float)
	y_test = np.array(shufled_processedy[-1000:], dtype=np.float)
	print(y_test[999])
	print(X_test[999])

	print(X_train.shape,y_train.shape, X_valid.shape, y_valid.shape)

	n_outputs = len(np.unique(shufled_processedy))
	print(np.unique(shufled_processedy))
	n_outputs

	import tensorflow as tf

	height = 256
	width = 16
	channels = 1
	n_inputs = height * width
	n_outputs = len(np.unique(processedy))

	with tf.name_scope("inputs"):
	X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
	X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
	y = tf.placeholder(tf.int32, shape=[None], name="y")
	training = tf.placeholder_with_default(False, shape=[], name='training')

	#input: [batch_size, 32, 3, 1]
	#output: [batch_size, 32, 3, 32]
	conv1_fmaps = 32 #filters
	conv1_ksize = [32,3]
	conv1_stride = 1
	conv1_pad = "SAME"
	conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
	strides=conv1_stride, padding=conv1_pad,
	activation=tf.nn.relu, name="conv1")

	#input: [batch_size, 32, 3, 32]
	#output: [batch_size, 16, 2, 32]
	pool1_fmaps = conv1_fmaps
	with tf.name_scope("pool1"):
	pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)


	#input: [batch_size, 16, 2, 32]
	#output: [batch_size, 16, 2, 64]
	conv2_fmaps = 64
	conv2_ksize = [16,2]
	conv2_stride = 1
	conv2_pad = "SAME"
	conv2_dropout_rate = 0.25
	conv2 = tf.layers.conv2d(pool1, filters=conv2_fmaps, kernel_size=conv2_ksize,
	strides=conv2_stride, padding=conv2_pad,
	activation=tf.nn.relu, name="conv2")

	#input: [batch_size, 16, 2, 64]
	#output1: [batch_size, 8, 1, 64]
	#output2: [batch_size, 8 * 1 * 64]
	pool2_fmaps = conv2_fmaps
	with tf.name_scope("pool2"):
	pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
	pool2_flat = tf.reshape(pool2, shape=[-1, 8 * 1 * pool2_fmaps])
	pool2_flat_drop = tf.layers.dropout(pool2_flat, conv2_dropout_rate, training=training)

	n_fc1 = 512
	fc1_dropout_rate = 0.4
	with tf.name_scope("fc1"):
	fc1 = tf.layers.dense(pool2_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
	fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)

	with tf.name_scope("output"):
	logits = tf.layers.dense(fc1_drop, n_outputs, name="output")
	Y_proba = tf.nn.softmax(logits, name="Y_proba")

	with tf.name_scope("train"):
	xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
	loss = tf.reduce_mean(xentropy)
	optimizer = tf.train.AdamOptimizer()
	training_op = optimizer.minimize(loss)

	with tf.name_scope("eval"):
	correct = tf.nn.in_top_k(logits, y, 1)
	accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

	with tf.name_scope("init_and_save"):
	init = tf.global_variables_initializer()
	saver = tf.train.Saver()


	n_epochs = 1000
	batch_size = 40

	best_loss_val = np.infty
	check_interval = 10
	checks_since_last_progress = 0
	max_checks_without_progress = 20
	best_model_params = None

	with tf.Session() as sess:
	init.run()
	for epoch in range(n_epochs):
	rnd_idx = np.random.permutation(len(X_train))
	idx = 0
	for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
	#for idx in range(len(X_train) // batch_size):
	# print(idx)
	# X_reshaped = np.reshape(X_train[idx],(1, -1))
	# y_reshaped = np.reshape(y_train[idx],(-1))
	# print(X_reshaped.shape)
	# print(y_reshaped.shape)
	print(len(rnd_indices))
	X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices].astype(int)
	print(y_batch.shape)
	X_batch_reshaped = np.reshape(X_batch,(len(X_batch), -1))
	y_batch_reshaped = np.reshape(y_batch,(-1))
	sess.run(training_op, feed_dict={X: X_batch_reshaped, y: y_batch_reshaped, training: True})
	print('ohyes')
	if idx % check_interval == 0:
	X_valid_reshaped = np.reshape(X_valid,(len(X_valid), -1))
	loss_val = loss.eval(feed_dict={X: X_valid_reshaped,
	y: y_valid})
	print(loss_val)
	if loss_val < best_loss_val:
	best_loss_val = loss_val
	checks_since_last_progress = 0
	best_model_params = get_model_params()
	else:
	checks_since_last_progress += 1
	idx += 1
	X_batch_reshaped = np.reshape(X_batch,(len(X_batch), -1))
	acc_train = accuracy.eval(feed_dict={X: X_batch_reshaped, y: y_batch})
	X_valid_reshaped = np.reshape(X_valid,(len(X_valid), -1))
	acc_val = accuracy.eval(feed_dict={X: X_valid_reshaped,
	y: y_valid})
	print("Epoch {}, train accuracy: {:.4f}%, valid. accuracy: {:.4f}%, valid. best loss: {:.6f}".format(
	epoch, acc_train * 100, acc_val * 100, best_loss_val))
	if checks_since_last_progress > max_checks_without_progress:
	print("Early stopping!")
	break

	if best_model_params:
	restore_model_params(best_model_params)
	X_test_reshaped = np.reshape(X_test,(len(X_test), -1))
	acc_test = accuracy.eval(feed_dict={X: X_test_reshaped,
	y: y_test})
	print("Final accuracy on test set:", acc_test)
	save_path = saver.save(sess, "./my_model")