Created
June 29, 2018 09:36
-
-
Save crazyleg/e6da94871c3acb1a35b9d3f7d9c61c39 to your computer and use it in GitHub Desktop.
Playground to develop neural network based landing pages optimisations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
states = bandit number = user enviroment | |
action = bandit arm = type of landing page | |
''' | |
import numpy as np | |
import random | |
from tqdm import tqdm | |
import tensorflow as tf | |
import tensorflow.contrib.slim as slim | |
#TODO INBALANCE IN CLASSES | |
size=[3,2,3,3] | |
class WebsiteClass(): | |
def __init__(self): | |
self.state = 0 | |
#We want to find agrmax of this complex system, or approach it. Random distribution doesn't represent | |
#realistic distribution. | |
#self.bandits = np.random.rand(*size)/100+0.95 | |
self.bandits = [np.random.random(x)/50+0.95 for x in size] | |
self.num_actions = size.__len__() | |
def pullArm(self, action): | |
prob = 0 | |
for i in range(self.num_actions): | |
prob += self.bandits[i][action[i]] | |
prob = prob / self.num_actions | |
return 50 if random.random()>prob else -1 | |
class agent(): | |
def __init__(self, lr, a_size): | |
#These lines established the feed-forward part of the network. The agent takes a state and produces an action. | |
self.DummyState = tf.Variable([[0.1]]) | |
output = slim.fully_connected(self.DummyState,a_size,\ | |
biases_initializer=None, weights_initializer=tf.ones_initializer()) | |
self.res = [tf.squeeze(slim.fully_connected(output, x, \ | |
biases_initializer=None, activation_fn=tf.nn.sigmoid, | |
weights_initializer=tf.ones_initializer(), scope='results'+str(i))) for i, x in enumerate(size)] | |
self.chosen_actions = [tf.argmax(x, output_type=tf.int32) for x in self.res] | |
self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) | |
self.action_holder = tf.placeholder(shape=[len(size)], dtype=tf.int32) | |
self.responsible_weights = [tf.cast(tf.slice(self.res[i], [self.action_holder[i]], [1]),dtype=tf.float32) for i, x in enumerate(size)] | |
self.losses = [-tf.log(x)*self.reward_holder for x in self.responsible_weights] | |
self.loss = tf.reduce_mean(self.losses) | |
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) | |
self.update = optimizer.minimize(self.loss, var_list=tf.trainable_variables()) | |
if __name__ == "__main__": | |
tf.reset_default_graph() | |
total_episodes = 20000 | |
e = 0.5 | |
with tf.Session() as sess: | |
cBandit = WebsiteClass() # Load the bandits. | |
myAgent = agent(lr=0.001, a_size=10) | |
init = tf.global_variables_initializer() | |
sess.run(init) | |
optimal = np.zeros(cBandit.num_actions) | |
j=0 | |
for i in tqdm(range(total_episodes)): | |
action = [random.randint(0,x-1) for x in size] if np.random.rand(1) < e \ | |
else sess.run(myAgent.chosen_actions) | |
reward = cBandit.pullArm(action) | |
for q in range(cBandit.num_actions): | |
optimal[q] += 1 if np.argmax(cBandit.bandits[q])==action[q] else 0 | |
feed_dict = {myAgent.reward_holder: [reward], myAgent.action_holder: action} | |
_, loss,q,w = sess.run([myAgent.update, myAgent.loss, myAgent.chosen_actions, myAgent.res], feed_dict=feed_dict) | |
j += 1 | |
action, e = sess.run([ | |
myAgent.chosen_actions, myAgent.res]) | |
prob = 0 | |
for i in range(cBandit.num_actions): | |
print(f'I triggered {i} bandit with {action[i]} action, leading to {cBandit.bandits[i][action[i]]:5.6f} result') | |
print(f'This guess for {i} bandit was {action[i]==np.argmax(cBandit.bandits[i])}') | |
prob = prob + cBandit.bandits[i][action[i]] | |
prob = prob / cBandit.num_actions | |
print(cBandit.bandits) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment