Skip to content

Instantly share code, notes, and snippets.

@erichooi
Created August 28, 2018 03:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erichooi/210f361c560d32f92579fc67536c742e to your computer and use it in GitHub Desktop.
Save erichooi/210f361c560d32f92579fc67536c742e to your computer and use it in GitHub Desktop.
Implementation of Backpropogation of an LSTM
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
Wa = np.array([0.45, 0.25]).reshape(1, 2)
Wi = np.array([0.95, 0.8]).reshape(1, 2)
Wf = np.array([0.7, 0.45]).reshape(1, 2)
Wo = np.array([0.6, 0.4]).reshape(1, 2)
Ua = np.array(0.15).reshape(1, 1)
Ui = np.array(0.8).reshape(1, 1)
Uf = np.array(0.1).reshape(1, 1)
Uo = np.array(0.25).reshape(1, 1)
ba = np.array(0.2).reshape(1, 1)
bi = np.array(0.65).reshape(1, 1)
bf = np.array(0.15).reshape(1, 1)
bo = np.array(0.1).reshape(1, 1)
# stack all the weights and biases
W = np.vstack((Wa, Wi, Wf, Wo))
U = np.vstack((Ua, Ui, Uf, Uo))
b = np.vstack((ba, bi, bf, bo))
state_minus_1 = np.array(0).reshape(1, 1)
output_minus_1 = np.array(0).reshape(1, 1)
x0 = np.array([1, 2]).reshape(2, 1)
x1 = np.array([0.5, 3]).reshape(2, 1)
y0 = np.array(0.5).reshape(1, 1)
y1 = np.array(1.25).reshape(1, 1)
# forward prop 0
a0 = np.tanh(np.matmul(Wa, x0) + np.matmul(Ua, output_minus_1) + ba)
i0 = sigmoid(np.matmul(Wi, x0) + np.matmul(Ui, output_minus_1) + bi)
f0 = sigmoid(np.matmul(Wf, x0) + np.matmul(Uf, output_minus_1) + bf)
o0 = sigmoid(np.matmul(Wo, x0) + np.matmul(Uo, output_minus_1) + bo)
state_0 = np.matmul(f0, state_minus_1) + np.matmul(a0, i0)
output_0 = np.matmul(o0, np.tanh(state_0))
# forward prop 1
a1 = np.tanh(np.matmul(Wa, x1) + np.matmul(Ua, output_0) + ba)
i1 = sigmoid(np.matmul(Wi, x1) + np.matmul(Ui, output_0) + bi)
f1 = sigmoid(np.matmul(Wf, x1) + np.matmul(Uf, output_0) + bf)
o1 = sigmoid(np.matmul(Wo, x1) + np.matmul(Uo, output_0) + bo)
state_1 = np.matmul(f1, state_0) + np.matmul(a1, i1)
output_1 = np.matmul(o1, np.tanh(state_1))
# backward prop 1
# future value
dstate_2 = np.array(0).reshape(1, 1)
f2 = np.array(0).reshape(1, 1)
different_error_1 = output_1 - y1 # different with real answer
different_output_1 = 0 # different with future output
doutput_1 = different_error_1 + different_output_1
dstate_1 = doutput_1.dot(o1).dot(1 - np.square(np.tanh(state_1))) + dstate_2.dot(f2)
da1 = dstate_1.dot(i1).dot(1 - np.square(a1))
di1 = dstate_1.dot(a1).dot(i1).dot(1 - i1)
df1 = dstate_1.dot(state_0).dot(f1).dot(1 - f1)
do1 = doutput_1.dot(np.tanh(state_1)).dot(o1).dot(1 - o1)
dgates1 = np.vstack((da1, di1, df1, do1))
dx1 = W.T.dot(dgates1)
different_output_0 = U.T.dot(dgates1)
# backward prop 0
different_error_0 = output_0 - y0
different_output_0 = different_output_0
doutput_0 = different_error_0 + different_output_0
dstate_0 = doutput_0.dot(o0).dot(1 - np.square(np.tanh(state_0))) + dstate_1.dot(f1)
da0 = dstate_0.dot(i0).dot(1 - np.square(a0))
di0 = dstate_0.dot(a0).dot(i0).dot(1 - i0)
df0 = dstate_0.dot(state_minus_1).dot(f0).dot(1 - f0)
do0 = doutput_0.dot(np.tanh(state_0)).dot(o0).dot(1 - o0)
dgates0 = np.vstack((da0, di0, df0, do0))
dx0 = W.T.dot(dgates0)
different_output_minus_1 = U.T.dot(dgates0)
# SGD update with learning rate 0.1
dW = np.add(dgates0.dot(x0.reshape(1, 2)), dgates1.dot(x1.reshape(1, 2)))
dU = dgates1.dot(output_0)
db = np.add(dgates0, dgates1)
# update all weights and biases
W_new = W - 0.1 * dW
U_new = U - 0.1 * dU
b_new = b - 0.1 * db
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment