Skip to content

Instantly share code, notes, and snippets.

@victorfsf
Last active March 27, 2017 00:07
Show Gist options
  • Save victorfsf/ec04c4e0fce4d6c6c4d4f7b480054ca0 to your computer and use it in GitHub Desktop.
Save victorfsf/ec04c4e0fce4d6c6c4d4f7b480054ca0 to your computer and use it in GitHub Desktop.
# Base code from: https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/
import copy
import numpy as np
np.random.seed(1)
def sigmoid(x):
output = 1/(1+np.exp(-x))
return output
def sigmoid_output_to_derivative(output):
return output*(1-output)
int2binary = {}
binary_dim = 16
largest_number = pow(2, binary_dim)
int2binary = {
x[0]: np.array([
int(x) for x in list(bin(x[0]).replace('0b', '').zfill(binary_dim))
])
for x in np.array([range(largest_number)], dtype=np.uint16).T
}
# binary_dim = 8
# binary = np.unpackbits(
# np.array([range(largest_number)],dtype=np.uint16).T, axis=1)
# for i in range(largest_number):
# int2binary[i] = binary[i]
alpha = 0.1
input_dim = 2
hidden_dim = 10
output_dim = 1
def predict(session, a_int, b_int):
train(session, times=1, a_int=a_int, b_int=b_int)
return session
def train(session, times=10000, a_int=None, b_int=None):
synapse_0 = session[0] if session[0].any() else 2 * np.random.random(
(input_dim, hidden_dim)
) - 1
synapse_1 = session[1] if session[1].any() else 2 * np.random.random(
(hidden_dim, output_dim)
) - 1
synapse_h = session[2] if session[2].any() else 2 * np.random.random(
(hidden_dim, hidden_dim)
) - 1
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)
for j in range(times):
a_int = a_int if a_int is not None else \
np.random.randint(largest_number / 2)
a = int2binary[a_int]
b_int = b_int if b_int is not None else \
np.random.randint(largest_number / 2)
b = int2binary[b_int]
c_int = a_int + b_int
c = int2binary[c_int]
d = np.zeros_like(c)
overall_error = 0
layer_2_deltas = []
layer_1_values = []
layer_1_values.append(np.zeros(hidden_dim))
for position in range(binary_dim):
X = np.array([[
a[binary_dim - position - 1],
b[binary_dim - position - 1]
]])
y = np.array([[
c[binary_dim - position - 1]
]]).T
layer_1 = sigmoid(
np.dot(X, synapse_0) +
np.dot(layer_1_values[-1], synapse_h)
)
layer_2 = sigmoid(np.dot(layer_1, synapse_1))
layer_2_error = y - layer_2
layer_2_deltas.append(
(layer_2_error) * sigmoid_output_to_derivative(layer_2)
)
overall_error += np.abs(layer_2_error[0])
d[binary_dim - position - 1] = np.round(layer_2[0][0])
layer_1_values.append(copy.deepcopy(layer_1))
future_layer_1_delta = np.zeros(hidden_dim)
for position in range(binary_dim):
X = np.array([[a[position], b[position]]])
layer_1 = layer_1_values[-position - 1]
prev_layer_1 = layer_1_values[-position - 2]
layer_2_delta = layer_2_deltas[-position - 1]
layer_1_delta = (
future_layer_1_delta.dot(synapse_h.T) +
layer_2_delta.dot(synapse_1.T)
) * sigmoid_output_to_derivative(layer_1)
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
synapse_h_update += np.atleast_2d(prev_layer_1).T \
.dot(layer_1_delta)
synapse_0_update += X.T.dot(layer_1_delta)
future_layer_1_delta = layer_1_delta
synapse_0 += synapse_0_update * alpha
synapse_1 += synapse_1_update * alpha
synapse_h += synapse_h_update * alpha
synapse_0_update *= 0
synapse_1_update *= 0
synapse_h_update *= 0
if j % 1000 == 0:
print('Error:', overall_error)
print('Pred:', d)
print('True:', c)
out = 0
for i, x in enumerate(reversed(d)):
out += x * pow(2, i)
print('%s + %s = %s' % (a_int, b_int, out))
print('------------')
a_int = None
b_int = None
return synapse_0, synapse_1, synapse_h
# Base code from: https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/
import copy
import numpy as np
np.random.seed(1)
def sigmoid(x):
output = 1/(1+np.exp(-x))
return output
def sigmoid_output_to_derivative(output):
return output*(1-output)
int2binary = {}
binary_dim = 16
largest_number = pow(2, binary_dim)
int2binary = {
x[0]: np.array([
int(x) for x in list(bin(x[0]).replace('0b', '').zfill(binary_dim))
])
for x in np.array([range(largest_number)], dtype=np.uint16).T
}
# binary_dim = 8
# binary = np.unpackbits(
# np.array([range(largest_number)],dtype=np.uint16).T, axis=1)
# for i in range(largest_number):
# int2binary[i] = binary[i]
alpha = 0.1
input_dim = 2
hidden_dim = 10
output_dim = 1
def predict(session, a_int, b_int):
train(session, times=1, a_int=a_int, b_int=b_int)
return session
def train(session, times=10000, a_int=None, b_int=None):
synapse_0 = session[0] if session[0].any() else 2 * np.random.random(
(input_dim, hidden_dim)
) - 1
synapse_1 = session[1] if session[1].any() else 2 * np.random.random(
(hidden_dim, output_dim)
) - 1
synapse_h = session[2] if session[2].any() else 2 * np.random.random(
(hidden_dim, hidden_dim)
) - 1
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)
for j in range(times):
a_int = a_int if a_int is not None else \
np.random.randint(largest_number)
a = int2binary[a_int]
b_int = b_int if b_int is not None else \
np.random.randint(a_int)
b = int2binary[b_int]
c_int = a_int - b_int
c = int2binary[c_int]
d = np.zeros_like(c)
overall_error = 0
layer_2_deltas = []
layer_1_values = []
layer_1_values.append(np.zeros(hidden_dim))
for position in range(binary_dim):
X = np.array([[
a[binary_dim - position - 1],
b[binary_dim - position - 1]
]])
y = np.array([[
c[binary_dim - position - 1]
]]).T
layer_1 = sigmoid(
np.dot(X, synapse_0) +
np.dot(layer_1_values[-1], synapse_h)
)
layer_2 = sigmoid(np.dot(layer_1, synapse_1))
layer_2_error = y - layer_2
layer_2_deltas.append(
(layer_2_error) * sigmoid_output_to_derivative(layer_2)
)
overall_error += np.abs(layer_2_error[0])
d[binary_dim - position - 1] = np.round(layer_2[0][0])
layer_1_values.append(copy.deepcopy(layer_1))
future_layer_1_delta = np.zeros(hidden_dim)
for position in range(binary_dim):
X = np.array([[a[position], b[position]]])
layer_1 = layer_1_values[-position - 1]
prev_layer_1 = layer_1_values[-position - 2]
layer_2_delta = layer_2_deltas[-position - 1]
layer_1_delta = (
future_layer_1_delta.dot(synapse_h.T) +
layer_2_delta.dot(synapse_1.T)
) * sigmoid_output_to_derivative(layer_1)
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
synapse_h_update += np.atleast_2d(prev_layer_1).T \
.dot(layer_1_delta)
synapse_0_update += X.T.dot(layer_1_delta)
future_layer_1_delta = layer_1_delta
synapse_0 += synapse_0_update * alpha
synapse_1 += synapse_1_update * alpha
synapse_h += synapse_h_update * alpha
synapse_0_update *= 0
synapse_1_update *= 0
synapse_h_update *= 0
if j % 1000 == 0:
print('Error:', overall_error)
print('Pred:', d)
print('True:', c)
out = 0
for i, x in enumerate(reversed(d)):
out += x * pow(2, i)
print('%s - %s = %s' % (a_int, b_int, out))
print('------------')
a_int = None
b_int = None
return synapse_0, synapse_1, synapse_h
from keras.layers import Dense
from keras.models import Sequential
from sklearn.metrics import confusion_matrix
import numpy as np
# fix random seed for reproducibility
seed = 0
np.random.seed(seed)
# load pima indians dataset
dataset = np.loadtxt("datasets/pima-indians-diabetes.data.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
y = dataset[:, 8]
split = round(len(y) * .7)
y0 = y[:split]
y1 = y[split:]
X0 = X[:split]
X1 = X[split:]
model = Sequential()
model.add(Dense(12, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, nb_epoch=150, batch_size=10, verbose=0)
scores = model.evaluate(X0, y0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
predictions = [round(x[0]) for x in model.predict(X1)]
t = 0
f = 0
for i, prediction in enumerate(predictions):
if prediction == y1[i]:
t += 1
else:
f += 1
accuracy = t / (t + f)
cf_matrix = confusion_matrix(y1, predictions)
import numpy as np
from sklearn import datasets
np.random.seed(0)
X, y = datasets.make_moons(2000, noise=0.20)
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
# Gradient descent parameters (I picked these by hand)
epsilon = 0.0005 # learning rate for gradient descent
reg_lambda = 0.0001 # regularization strength
# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation to calculate our predictions
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./num_examples * data_loss
# Helper function to predict an output (0 or 1)
def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(nn_hdim, num_passes=200000, print_loss=False):
# Initialize the parameters to random values. We need to learn these.
np.random.seed(0)
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# This is what we return at the end
model = {}
# Gradient descent. For each batch...
for i in range(num_passes):
# Forward propagation
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Backpropagation
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# Optionally print the loss.
if print_loss and i % 1000 == 0:
print("Loss after iteration %i: %f" % (i, calculate_loss(model)))
return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment