-
-
Save treutm/1b1f49e3d0a9de53cd67c136292f329f to your computer and use it in GitHub Desktop.
RNN LSTM Music Generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import glob | |
from music21 import converter, instrument, note, chord, stream | |
import numpy as np | |
from keras.utils import np_utils | |
import subprocess, os.path | |
data,durations,offsets = np.random.randint(0,251,(32427)),np.random.randint(0,9,(32427)),[round(np.random.uniform(0,.75),2) for i in range(32427)] | |
seq_length = 40 | |
vocab_size = len(set(data)) | |
pitchnames = sorted(list(set(data))) | |
unique_durations = sorted(list(set(durations))) | |
unique_offsets = sorted(list(set(offsets))) | |
num_durations = len(unique_durations) | |
num_offsets = len(unique_offsets) | |
note_num = {ch: i for i, ch in enumerate(pitchnames)} | |
num_note = {i:ch for i, ch in enumerate(pitchnames)} | |
dur_num = {ch: i for i, ch in enumerate(unique_durations)} | |
num_dur = {i:ch for i, ch in enumerate(unique_durations)} | |
off_num = {ch: i for i, ch in enumerate(unique_offsets)} | |
num_off = {i:ch for i, ch in enumerate(unique_offsets)} | |
data_size = len(data) | |
hidden_size = 150 | |
Z = vocab_size + hidden_size + num_durations + num_offsets | |
class Param: | |
def __init__(self,name,value): | |
self.name = name | |
self.w = value #weights | |
self.g = np.zeros_like(value) | |
self.m = np.zeros_like(value) | |
self.r = np.zeros_like(value) | |
class Parameters: | |
def __init__(self): | |
self.wf = Param('wf',np.random.randn(hidden_size,Z) / np.sqrt(Z / 2)) | |
self.wu = Param('wu',np.random.randn(hidden_size,Z) / np.sqrt(Z / 2)) | |
self.wc = Param('wc',np.random.randn(hidden_size,Z) / np.sqrt(Z / 2)) | |
self.wo = Param('wo',np.random.randn(hidden_size,Z) / np.sqrt(Z / 2)) | |
self.wy = Param('wy',np.random.randn(vocab_size+ num_durations + num_offsets,hidden_size) / np.sqrt(vocab_size / 2)) | |
self.bu = Param('bu',np.zeros((hidden_size, 1))) | |
self.bc = Param('bc',np.zeros((hidden_size, 1))) | |
self.bo = Param('bo',np.zeros((hidden_size, 1))) | |
self.bf = Param('bf',np.zeros((hidden_size, 1))) | |
self.by = Param('by',np.zeros((vocab_size+ num_durations + num_offsets, 1))) | |
def all(self): | |
return [self.wf,self.wu,self.wc,self.wo,self.wy,self.bu,self.bc,self.bo,self.bf,self.by] | |
params = Parameters() | |
def tanh(z): | |
return np.tanh(z) | |
def softmax(z): | |
return np.exp(z) / np.sum(np.exp(z)) | |
def sigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
def sigmoidGradient(z): | |
return z * (1-z) | |
def tanhGradient(z): | |
return 1 - z * z | |
def clear_grad(p = params): | |
for params in p.all(): | |
params.g.fill(0) | |
def clip_grad(p = params): | |
for params in p.all(): | |
np.clip(params.g,-1,1,out=params.g) | |
def cross_entropy(probs, label): | |
return -np.sum(label * np.log(probs)) | |
def forward(x,a_old,c_old,p = params): | |
X = np.row_stack((a_old,x)) | |
X = X.astype(int) | |
hf = sigmoid(np.dot(p.wf.w, X) + p.bf.w) | |
hu = sigmoid(np.dot(p.wu.w, X) + p.bu.w) | |
ho = sigmoid(np.dot(p.wo.w, X) + p.bo.w) | |
c_temp = tanh(np.dot(p.wc.w, X) + p.bc.w) | |
c = hu*c_temp + hf * c_old | |
a = ho * tanh(c) | |
y = np.dot(p.wy.w,a) + p.by.w | |
prob = softmax(y) | |
cache = (c_temp, hf, hu, ho, c, a, X, c_old) | |
return X, hf, hu, c_temp, c, ho, a, y, prob | |
def backward(cache, a_next,c_next, prob, target,p=params): | |
c_temp, hf, hu, ho, c, a, X, c_old = cache | |
target = target.astype(int) | |
dy = np.copy(prob) | |
dy[target] -= 1 | |
p.wy.g += np.dot(dy,a.T) | |
p.by.g += dy | |
dh = np.dot(p.wy.w.T, dy) | |
dh += a_next | |
dho = dh * tanh(c) | |
dho = sigmoidGradient(ho) * dho | |
p.wo.g += np.dot(dho,X.T) | |
p.bo.g += dho | |
dc = np.copy(c_next) | |
dc += dh * ho * tanhGradient(tanh(c)) | |
dc_temp = hu * dc | |
dc_temp = tanhGradient(c_temp) * dc_temp | |
p.wc.g += np.dot(dc_temp,X.T) | |
p.bc.g += dc_temp | |
dhu = c_temp * dc | |
dhu = sigmoidGradient(hu) * dhu | |
p.wu.g += np.dot(dhu,X.T) | |
p.bu.g += dhu | |
dhf = c_old * dc | |
dhf = sigmoidGradient(hf) * dhf | |
p.wf.g += np.dot(dhf,X.T) | |
p.bf.g += dhf | |
dXf = np.dot(p.wf.w.T, dhf) | |
dXu = np.dot(p.wu.w.T, dhu) | |
dXo = np.dot(p.wo.w.T, dho) | |
dXc = np.dot(p.wc.w.T, dc_temp) | |
dX = dXo + dXc + dXu + dXf | |
a_next = dX[:hidden_size,:] | |
c_next = hf * dc | |
return a_next, c_next | |
def forward_backward(input,target,a_prev,c_prev): | |
global params | |
x_t,X_t,hf_t,hu_t = {},{},{},{} | |
c_temp_t,c_t, ho_t,a_t = {},{},{},{} | |
y_t, prob_t = {},{} | |
a_t[-1] = np.copy(a_prev) | |
c_t[-1] = np.copy(c_prev) | |
loss = 0 | |
for t in range(len(input)): | |
x_t[t] = input[t] | |
X_t[t], hf_t[t], hu_t[t], c_temp_t[t], c_t[t], ho_t[t], a_t[t],y_t[t], prob_t[t] = forward(x_t[t],a_t[t-1],c_t[t-1]) | |
loss += cross_entropy(prob_t[t],target[t]) | |
clear_grad() | |
a_next,c_next =np.zeros_like(a_t[0]),np.zeros_like(c_t[0]) | |
for t in reversed(range(len(input))): | |
cache =(c_temp_t[t], hf_t[t], hu_t[t], ho_t[t], c_t[t], a_t[t], X_t[t], c_t[t-1]) | |
a_next,c_next = backward(cache,a_next,c_next,prob_t[t],target[t],p=params) | |
clip_grad() | |
return loss, a_t[len(input)-1],c_t[len(input)-1] | |
def sample(sequence_length,strict=False): | |
a_prev = np.zeros((hidden_size,1)) | |
c_prev = np.zeros((hidden_size,1)) | |
x = np.zeros((vocab_size,1)) | |
x[np.random.randint(1,vocab_size)] = 1 | |
indexes = [] | |
a,c = a_prev,c_prev | |
for i in range(sequence_length): | |
_,_,_,_,_c,_,a,_,prob = forward(x,a,c) | |
if strict: | |
choice = np.argmax(prob) | |
else: | |
choice=np.random.choice(range(vocab_size), p=prob.flatten()) | |
x = np.zeros((vocab_size,1)) | |
x[choice] = 1 | |
indexes.append(choice) | |
return indexes | |
def update_params(p = params): | |
for p in params.all(): | |
p.m += p.g * p.g | |
p.w += -(learning_rate * p.g / np.sqrt(p.m + 1e-8)) | |
def update_progress(input,a_prev,c_prev): | |
global iters,losses | |
global smooth_loss | |
# samplesent = sample(200,a_prev,c_prev,input[0]) | |
# sentence = ''.join(num_note[choice] for choice in samplesent) | |
# plt.plot(iters,losses) | |
# display.clear_output(wait=True) | |
# print('--------\n' + str(sentence) + '\n--------') | |
print('Iteration: ' + str(iteration) + ', loss = ' + str(smooth_loss)) | |
learning_rate = 0.1 | |
p = 0 | |
iteration = 0 | |
smooth_loss = -np.log(1.0 / vocab_size) * seq_length | |
def reset(): | |
global learning_rate,p,iteration | |
learning_rate = 0.1 | |
p = 0 | |
iteration = 0 | |
def train(update_length): | |
global p,iteration,smooth_loss | |
while True: | |
if p + seq_length >= data_size or iteration == 0: | |
a_prev = np.zeros((hidden_size,1)) | |
c_prev = np.zeros((hidden_size,1)) | |
p = 0 | |
input_data = [note_num[char] for char in data[p:p+seq_length]] | |
input_temp = [np.zeros((vocab_size,1)) for i in range(p+seq_length)] | |
input_durations = [dur_num[char] for char in durations[p:p+seq_length]] | |
input_durations_t = [np.zeros((num_durations,1)) for i in range(p+seq_length)] | |
input_offset = [off_num[char] for char in offsets[p:p+seq_length]] | |
input_offset_t = [np.zeros((num_offsets,1)) for i in range(p+seq_length)] | |
for i in range(seq_length): | |
input_temp[i][input_data[i]] = 1 | |
input_durations_t[i][input_durations[i]] = 1 | |
input_offset_t[i][input_offset[i]] = 1 | |
input = [np.asarray(([input_temp[i]],[input_durations_t[i]],[input_offset_t[i]])) for i in range(seq_length)] | |
input = [np.row_stack((input[t][0][0],input[t][1][0],input[t][2][0])) for t in range(seq_length)] | |
target_durations = [dur_num[char] for char in durations[p+1:p+1+seq_length]] | |
target_durations_t = [np.zeros((num_durations,1)) for i in range(seq_length)] | |
target_offset = [off_num[char] for char in offsets[p+1:p+1+seq_length]] | |
target_offset_t = [np.zeros((num_offsets,1)) for i in range(seq_length)] | |
target_data = [note_num[char] for char in data[p+1:p+1+seq_length]] | |
target_temp = [np.zeros((vocab_size,1)) for i in range(seq_length)] | |
for i in range(seq_length): | |
target_temp[i][target_data[i]] = 1 | |
target_durations_t[i][target_durations[i]] = 1 | |
target_offset_t[i][target_offset[i]] = 1 | |
target = [np.asarray(([target_temp[i]],[target_durations_t[i]],[target_offset_t[i]])) for i in range(seq_length)] | |
target = [np.row_stack((target[t][0][0],target[t][1][0],target[t][2][0])) for t in range(seq_length)] | |
loss,a_prev,c_prev = forward_backward(input,target,a_prev,c_prev) | |
smooth_loss = smooth_loss * 0.999 + loss * 0.001 | |
if iteration % update_length == 0: | |
update_progress(input,a_prev,c_prev) | |
update_params() | |
# iters.append(iteration) | |
# losses.append(loss) | |
p+=seq_length | |
iteration+=1 | |
train(10) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment