Skip to content

Instantly share code, notes, and snippets.

@saurabhvyas
Created October 2, 2017 15:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saurabhvyas/40bea8449fc92b6053294cdb8bff3394 to your computer and use it in GitHub Desktop.
Save saurabhvyas/40bea8449fc92b6053294cdb8bff3394 to your computer and use it in GitHub Desktop.
main.py
# coding: utf-8
# In[ ]:
# In[1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
# Hyper Parameters
input_size = 13
hidden_size = 60
num_layers = 2
num_classes = 2
batch_size = 1
num_epochs = 2
learning_rate = 0.01
# In[2]:
from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav
# In[3]:
def audio_to_mfcc(fileurl):
rate, sig = wav.read(fileurl)
mfcc_feat = mfcc(sig,rate)
#d_mfcc_feat = delta(mfcc_feat, 2)
#fbank_feat = logfbank(sig,rate)
return mfcc_feat
# In[21]:
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
# Set initial states
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
# Forward propagate RNN
out, _ = self.lstm(x, (h0, c0))
# print (out.size())
# Decode hidden state of last time step
out = self.fc(out[:, -1, :])
return out
# In[22]:
# create a many to one LSTM , just take last output timestep as output
rnn = RNN(input_size, hidden_size, num_layers, num_classes)
# In[ ]:
# In[23]:
# Loss and Optimizer
#criterion = nn.NLLLoss()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
# In[32]:
mfcc1 = audio_to_mfcc('/home/saurabh/Documents/audio_classification/data/lizzie.wav')
#print(mfcc1.shape)
mfcc2 = audio_to_mfcc('/home/saurabh/Documents/audio_classification/data/boy.wav')
#print(mfcc2.shape)
temp = mfcc1[ : , np.newaxis , :]
temp2= mfcc2[ : , np.newaxis , :]
#print(temp2.shape)
input_var = Variable(torch.Tensor(temp))
input2_var = Variable(torch.Tensor(temp2))
for epoch in range(num_epochs):
outputs = rnn(input_var)
outputs2= rnn(input2_var)
# print(outputs[999])
t1=Variable(torch.FloatTensor(outputs.data[999]),requires_grad=True)
t1=t1.unsqueeze(0)
# print(t1)
t2=Variable(torch.FloatTensor(outputs2.data[998]),requires_grad=True)
t2=t2.unsqueeze(0)
# print(t2)
#final_output = Variable( t1 , requires_grad =True)
#final_output2= Variable( t2 , requires_grad =True)
# final_output.unsqueeze(0)
# final_output2.unsqueeze(0)
# final_output_numpy=final_output.data.numpy()[np.newaxis,:]
# final_output = torch.from_numpy(final_output_numpy)
# final_output_numpy2=final_output2.data.numpy()[np.newaxis,:]
# final_output2 = torch.from_numpy(final_output_numpy2)
#print(final_output_numpy.shape)
# print(final_output.size())
label = Variable(torch.LongTensor([0]))
label2 = Variable(torch.LongTensor([1]))
#print (label.size())
loss = criterion( t1, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss.data[0])
loss2=criterion( t2, label2)
optimizer.zero_grad()
loss2.backward()
optimizer.step()
print(loss2.data[0])
# In[ ]:
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment