Skip to content

Instantly share code, notes, and snippets.

@duarteguilherme
Created December 9, 2017 17:57
Show Gist options
  • Save duarteguilherme/7b10b9a5f1879929191b1b580e6fc527 to your computer and use it in GitHub Desktop.
Save duarteguilherme/7b10b9a5f1879929191b1b580e6fc527 to your computer and use it in GitHub Desktop.
from keras.models import Model, Input,Sequential
from keras.layers import Dense
from keras.optimizers import Adam # not important as there's no training here.
import numpy as np
from sklearn.datasets import load_digits
import numpy as np
digits = load_digits()
X = digits['data']
Y = digits['target']
X = X[Y <= 1]
Y = Y[Y <= 1]
def logistic(X,w,b):
return 1/(1+np.exp(-(np.dot(X,w) + b )))
def relu(X,w,b):
Y_prob = (np.dot(X,w) + b )
Y_prob[Y_prob < 0] = 0
return Y_prob
POPULATION_SIZE = 50
SIGMA = .1
LEARNING_RATE = .001
w1 = np.random.randn(X.shape[1], 8)
b1 = np.random.randn(1, 8)
w2 = np.random.randn(8, 1)
b2 = np.random.randn(1,1)
weights = [w1,b1,w2,b2]
solution = Y.reshape(-1, 1)
inp = X
def get_weights_try(w, p):
weights_try = []
for index, i in enumerate(p):
jittered = SIGMA*i
weights_try.append(w[index] + jittered)
return weights_try
def get_reward(weights):
global solution, model, inp
w1,b1,w2,b2 = weights
prediction = logistic(relu(X, w1,b1), w2, b2)
# here our best reward is zero
reward = -np.sum(np.square(solution - prediction))
return reward
def get_accuracy(weights):
global solution, model, inp
w1,b1,w2,b2 = weights
Y_prob = logistic(relu(X, w1,b1), w2, b2)
Y_prob[Y_prob >= .5] = 1
Y_prob[Y_prob < .5] = 0
return np.mean(solution == Y_prob)
for iteration in range(800):
if iteration % 20 == 0:
print('Iter: ' + str(iteration) + " Reward: " + str(get_reward(weights)) +
" Accuracy: " + str(get_accuracy(weights)))
population = []
rewards = np.zeros(POPULATION_SIZE)
for i in range(POPULATION_SIZE):
x = []
for w in weights:
x.append(np.random.randn(*w.shape))
population.append(x)
for i in range(POPULATION_SIZE):
weights_try = get_weights_try(weights, population[i])
rewards[i] = get_reward(weights_try)
rewards = (rewards - np.mean(rewards)) / np.std(rewards)
for index, w in enumerate(weights):
A = np.array([p[index] for p in population])
weights[index] = w + LEARNING_RATE/(POPULATION_SIZE*SIGMA) * np.dot(A.T, rewards).T
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment