Last active
March 27, 2017 00:07
-
-
Save victorfsf/ec04c4e0fce4d6c6c4d4f7b480054ca0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Base code from: https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/ | |
import copy | |
import numpy as np | |
np.random.seed(1) | |
def sigmoid(x): | |
output = 1/(1+np.exp(-x)) | |
return output | |
def sigmoid_output_to_derivative(output): | |
return output*(1-output) | |
int2binary = {} | |
binary_dim = 16 | |
largest_number = pow(2, binary_dim) | |
int2binary = { | |
x[0]: np.array([ | |
int(x) for x in list(bin(x[0]).replace('0b', '').zfill(binary_dim)) | |
]) | |
for x in np.array([range(largest_number)], dtype=np.uint16).T | |
} | |
# binary_dim = 8 | |
# binary = np.unpackbits( | |
# np.array([range(largest_number)],dtype=np.uint16).T, axis=1) | |
# for i in range(largest_number): | |
# int2binary[i] = binary[i] | |
alpha = 0.1 | |
input_dim = 2 | |
hidden_dim = 10 | |
output_dim = 1 | |
def predict(session, a_int, b_int): | |
train(session, times=1, a_int=a_int, b_int=b_int) | |
return session | |
def train(session, times=10000, a_int=None, b_int=None): | |
synapse_0 = session[0] if session[0].any() else 2 * np.random.random( | |
(input_dim, hidden_dim) | |
) - 1 | |
synapse_1 = session[1] if session[1].any() else 2 * np.random.random( | |
(hidden_dim, output_dim) | |
) - 1 | |
synapse_h = session[2] if session[2].any() else 2 * np.random.random( | |
(hidden_dim, hidden_dim) | |
) - 1 | |
synapse_0_update = np.zeros_like(synapse_0) | |
synapse_1_update = np.zeros_like(synapse_1) | |
synapse_h_update = np.zeros_like(synapse_h) | |
for j in range(times): | |
a_int = a_int if a_int is not None else \ | |
np.random.randint(largest_number / 2) | |
a = int2binary[a_int] | |
b_int = b_int if b_int is not None else \ | |
np.random.randint(largest_number / 2) | |
b = int2binary[b_int] | |
c_int = a_int + b_int | |
c = int2binary[c_int] | |
d = np.zeros_like(c) | |
overall_error = 0 | |
layer_2_deltas = [] | |
layer_1_values = [] | |
layer_1_values.append(np.zeros(hidden_dim)) | |
for position in range(binary_dim): | |
X = np.array([[ | |
a[binary_dim - position - 1], | |
b[binary_dim - position - 1] | |
]]) | |
y = np.array([[ | |
c[binary_dim - position - 1] | |
]]).T | |
layer_1 = sigmoid( | |
np.dot(X, synapse_0) + | |
np.dot(layer_1_values[-1], synapse_h) | |
) | |
layer_2 = sigmoid(np.dot(layer_1, synapse_1)) | |
layer_2_error = y - layer_2 | |
layer_2_deltas.append( | |
(layer_2_error) * sigmoid_output_to_derivative(layer_2) | |
) | |
overall_error += np.abs(layer_2_error[0]) | |
d[binary_dim - position - 1] = np.round(layer_2[0][0]) | |
layer_1_values.append(copy.deepcopy(layer_1)) | |
future_layer_1_delta = np.zeros(hidden_dim) | |
for position in range(binary_dim): | |
X = np.array([[a[position], b[position]]]) | |
layer_1 = layer_1_values[-position - 1] | |
prev_layer_1 = layer_1_values[-position - 2] | |
layer_2_delta = layer_2_deltas[-position - 1] | |
layer_1_delta = ( | |
future_layer_1_delta.dot(synapse_h.T) + | |
layer_2_delta.dot(synapse_1.T) | |
) * sigmoid_output_to_derivative(layer_1) | |
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta) | |
synapse_h_update += np.atleast_2d(prev_layer_1).T \ | |
.dot(layer_1_delta) | |
synapse_0_update += X.T.dot(layer_1_delta) | |
future_layer_1_delta = layer_1_delta | |
synapse_0 += synapse_0_update * alpha | |
synapse_1 += synapse_1_update * alpha | |
synapse_h += synapse_h_update * alpha | |
synapse_0_update *= 0 | |
synapse_1_update *= 0 | |
synapse_h_update *= 0 | |
if j % 1000 == 0: | |
print('Error:', overall_error) | |
print('Pred:', d) | |
print('True:', c) | |
out = 0 | |
for i, x in enumerate(reversed(d)): | |
out += x * pow(2, i) | |
print('%s + %s = %s' % (a_int, b_int, out)) | |
print('------------') | |
a_int = None | |
b_int = None | |
return synapse_0, synapse_1, synapse_h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Base code from: https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/ | |
import copy | |
import numpy as np | |
np.random.seed(1) | |
def sigmoid(x): | |
output = 1/(1+np.exp(-x)) | |
return output | |
def sigmoid_output_to_derivative(output): | |
return output*(1-output) | |
int2binary = {} | |
binary_dim = 16 | |
largest_number = pow(2, binary_dim) | |
int2binary = { | |
x[0]: np.array([ | |
int(x) for x in list(bin(x[0]).replace('0b', '').zfill(binary_dim)) | |
]) | |
for x in np.array([range(largest_number)], dtype=np.uint16).T | |
} | |
# binary_dim = 8 | |
# binary = np.unpackbits( | |
# np.array([range(largest_number)],dtype=np.uint16).T, axis=1) | |
# for i in range(largest_number): | |
# int2binary[i] = binary[i] | |
alpha = 0.1 | |
input_dim = 2 | |
hidden_dim = 10 | |
output_dim = 1 | |
def predict(session, a_int, b_int): | |
train(session, times=1, a_int=a_int, b_int=b_int) | |
return session | |
def train(session, times=10000, a_int=None, b_int=None): | |
synapse_0 = session[0] if session[0].any() else 2 * np.random.random( | |
(input_dim, hidden_dim) | |
) - 1 | |
synapse_1 = session[1] if session[1].any() else 2 * np.random.random( | |
(hidden_dim, output_dim) | |
) - 1 | |
synapse_h = session[2] if session[2].any() else 2 * np.random.random( | |
(hidden_dim, hidden_dim) | |
) - 1 | |
synapse_0_update = np.zeros_like(synapse_0) | |
synapse_1_update = np.zeros_like(synapse_1) | |
synapse_h_update = np.zeros_like(synapse_h) | |
for j in range(times): | |
a_int = a_int if a_int is not None else \ | |
np.random.randint(largest_number) | |
a = int2binary[a_int] | |
b_int = b_int if b_int is not None else \ | |
np.random.randint(a_int) | |
b = int2binary[b_int] | |
c_int = a_int - b_int | |
c = int2binary[c_int] | |
d = np.zeros_like(c) | |
overall_error = 0 | |
layer_2_deltas = [] | |
layer_1_values = [] | |
layer_1_values.append(np.zeros(hidden_dim)) | |
for position in range(binary_dim): | |
X = np.array([[ | |
a[binary_dim - position - 1], | |
b[binary_dim - position - 1] | |
]]) | |
y = np.array([[ | |
c[binary_dim - position - 1] | |
]]).T | |
layer_1 = sigmoid( | |
np.dot(X, synapse_0) + | |
np.dot(layer_1_values[-1], synapse_h) | |
) | |
layer_2 = sigmoid(np.dot(layer_1, synapse_1)) | |
layer_2_error = y - layer_2 | |
layer_2_deltas.append( | |
(layer_2_error) * sigmoid_output_to_derivative(layer_2) | |
) | |
overall_error += np.abs(layer_2_error[0]) | |
d[binary_dim - position - 1] = np.round(layer_2[0][0]) | |
layer_1_values.append(copy.deepcopy(layer_1)) | |
future_layer_1_delta = np.zeros(hidden_dim) | |
for position in range(binary_dim): | |
X = np.array([[a[position], b[position]]]) | |
layer_1 = layer_1_values[-position - 1] | |
prev_layer_1 = layer_1_values[-position - 2] | |
layer_2_delta = layer_2_deltas[-position - 1] | |
layer_1_delta = ( | |
future_layer_1_delta.dot(synapse_h.T) + | |
layer_2_delta.dot(synapse_1.T) | |
) * sigmoid_output_to_derivative(layer_1) | |
synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta) | |
synapse_h_update += np.atleast_2d(prev_layer_1).T \ | |
.dot(layer_1_delta) | |
synapse_0_update += X.T.dot(layer_1_delta) | |
future_layer_1_delta = layer_1_delta | |
synapse_0 += synapse_0_update * alpha | |
synapse_1 += synapse_1_update * alpha | |
synapse_h += synapse_h_update * alpha | |
synapse_0_update *= 0 | |
synapse_1_update *= 0 | |
synapse_h_update *= 0 | |
if j % 1000 == 0: | |
print('Error:', overall_error) | |
print('Pred:', d) | |
print('True:', c) | |
out = 0 | |
for i, x in enumerate(reversed(d)): | |
out += x * pow(2, i) | |
print('%s - %s = %s' % (a_int, b_int, out)) | |
print('------------') | |
a_int = None | |
b_int = None | |
return synapse_0, synapse_1, synapse_h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.layers import Dense | |
from keras.models import Sequential | |
from sklearn.metrics import confusion_matrix | |
import numpy as np | |
# fix random seed for reproducibility | |
seed = 0 | |
np.random.seed(seed) | |
# load pima indians dataset | |
dataset = np.loadtxt("datasets/pima-indians-diabetes.data.csv", delimiter=",") | |
# split into input (X) and output (Y) variables | |
X = dataset[:, 0:8] | |
y = dataset[:, 8] | |
split = round(len(y) * .7) | |
y0 = y[:split] | |
y1 = y[split:] | |
X0 = X[:split] | |
X1 = X[split:] | |
model = Sequential() | |
model.add(Dense(12, input_dim=8, init='uniform', activation='relu')) | |
model.add(Dense(8, init='uniform', activation='relu')) | |
model.add(Dense(1, init='uniform', activation='sigmoid')) | |
model.compile( | |
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) | |
model.fit(X, y, nb_epoch=150, batch_size=10, verbose=0) | |
scores = model.evaluate(X0, y0) | |
print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) | |
predictions = [round(x[0]) for x in model.predict(X1)] | |
t = 0 | |
f = 0 | |
for i, prediction in enumerate(predictions): | |
if prediction == y1[i]: | |
t += 1 | |
else: | |
f += 1 | |
accuracy = t / (t + f) | |
cf_matrix = confusion_matrix(y1, predictions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn import datasets | |
np.random.seed(0) | |
X, y = datasets.make_moons(2000, noise=0.20) | |
num_examples = len(X) # training set size | |
nn_input_dim = 2 # input layer dimensionality | |
nn_output_dim = 2 # output layer dimensionality | |
# Gradient descent parameters (I picked these by hand) | |
epsilon = 0.0005 # learning rate for gradient descent | |
reg_lambda = 0.0001 # regularization strength | |
# Helper function to evaluate the total loss on the dataset | |
def calculate_loss(model): | |
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] | |
# Forward propagation to calculate our predictions | |
z1 = X.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
# Calculating the loss | |
corect_logprobs = -np.log(probs[range(num_examples), y]) | |
data_loss = np.sum(corect_logprobs) | |
# Add regulatization term to loss (optional) | |
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2))) | |
return 1./num_examples * data_loss | |
# Helper function to predict an output (0 or 1) | |
def predict(model, x): | |
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] | |
# Forward propagation | |
z1 = x.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
return np.argmax(probs, axis=1) | |
# This function learns parameters for the neural network and returns the model. | |
# - nn_hdim: Number of nodes in the hidden layer | |
# - num_passes: Number of passes through the training data for gradient descent | |
# - print_loss: If True, print the loss every 1000 iterations | |
def build_model(nn_hdim, num_passes=200000, print_loss=False): | |
# Initialize the parameters to random values. We need to learn these. | |
np.random.seed(0) | |
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim) | |
b1 = np.zeros((1, nn_hdim)) | |
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim) | |
b2 = np.zeros((1, nn_output_dim)) | |
# This is what we return at the end | |
model = {} | |
# Gradient descent. For each batch... | |
for i in range(num_passes): | |
# Forward propagation | |
z1 = X.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
# Backpropagation | |
delta3 = probs | |
delta3[range(num_examples), y] -= 1 | |
dW2 = (a1.T).dot(delta3) | |
db2 = np.sum(delta3, axis=0, keepdims=True) | |
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) | |
dW1 = np.dot(X.T, delta2) | |
db1 = np.sum(delta2, axis=0) | |
# Add regularization terms (b1 and b2 don't have regularization terms) | |
dW2 += reg_lambda * W2 | |
dW1 += reg_lambda * W1 | |
# Gradient descent parameter update | |
W1 += -epsilon * dW1 | |
b1 += -epsilon * db1 | |
W2 += -epsilon * dW2 | |
b2 += -epsilon * db2 | |
# Assign new parameters to the model | |
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} | |
# Optionally print the loss. | |
if print_loss and i % 1000 == 0: | |
print("Loss after iteration %i: %f" % (i, calculate_loss(model))) | |
return model |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment