Playground to develop neural network based landing pages optimisations.
''' | |
states = bandit number = user enviroment | |
action = bandit arm = type of landing page | |
''' | |
import numpy as np | |
import random | |
from tqdm import tqdm | |
import tensorflow as tf | |
import tensorflow.contrib.slim as slim | |
#TODO INBALANCE IN CLASSES | |
size=[3,2,3,3] | |
class WebsiteClass(): | |
def __init__(self): | |
self.state = 0 | |
#We want to find agrmax of this complex system, or approach it. Random distribution doesn't represent | |
#realistic distribution. | |
#self.bandits = np.random.rand(*size)/100+0.95 | |
self.bandits = [np.random.random(x)/50+0.95 for x in size] | |
self.num_actions = size.__len__() | |
def pullArm(self, action): | |
prob = 0 | |
for i in range(self.num_actions): | |
prob += self.bandits[i][action[i]] | |
prob = prob / self.num_actions | |
return 50 if random.random()>prob else -1 | |
class agent(): | |
def __init__(self, lr, a_size): | |
#These lines established the feed-forward part of the network. The agent takes a state and produces an action. | |
self.DummyState = tf.Variable([[0.1]]) | |
output = slim.fully_connected(self.DummyState,a_size,\ | |
biases_initializer=None, weights_initializer=tf.ones_initializer()) | |
self.res = [tf.squeeze(slim.fully_connected(output, x, \ | |
biases_initializer=None, activation_fn=tf.nn.sigmoid, | |
weights_initializer=tf.ones_initializer(), scope='results'+str(i))) for i, x in enumerate(size)] | |
self.chosen_actions = [tf.argmax(x, output_type=tf.int32) for x in self.res] | |
self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) | |
self.action_holder = tf.placeholder(shape=[len(size)], dtype=tf.int32) | |
self.responsible_weights = [tf.cast(tf.slice(self.res[i], [self.action_holder[i]], [1]),dtype=tf.float32) for i, x in enumerate(size)] | |
self.losses = [-tf.log(x)*self.reward_holder for x in self.responsible_weights] | |
self.loss = tf.reduce_mean(self.losses) | |
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) | |
self.update = optimizer.minimize(self.loss, var_list=tf.trainable_variables()) | |
if __name__ == "__main__": | |
tf.reset_default_graph() | |
total_episodes = 20000 | |
e = 0.5 | |
with tf.Session() as sess: | |
cBandit = WebsiteClass() # Load the bandits. | |
myAgent = agent(lr=0.001, a_size=10) | |
init = tf.global_variables_initializer() | |
sess.run(init) | |
optimal = np.zeros(cBandit.num_actions) | |
j=0 | |
for i in tqdm(range(total_episodes)): | |
action = [random.randint(0,x-1) for x in size] if np.random.rand(1) < e \ | |
else sess.run(myAgent.chosen_actions) | |
reward = cBandit.pullArm(action) | |
for q in range(cBandit.num_actions): | |
optimal[q] += 1 if np.argmax(cBandit.bandits[q])==action[q] else 0 | |
feed_dict = {myAgent.reward_holder: [reward], myAgent.action_holder: action} | |
_, loss,q,w = sess.run([myAgent.update, myAgent.loss, myAgent.chosen_actions, myAgent.res], feed_dict=feed_dict) | |
j += 1 | |
action, e = sess.run([ | |
myAgent.chosen_actions, myAgent.res]) | |
prob = 0 | |
for i in range(cBandit.num_actions): | |
print(f'I triggered {i} bandit with {action[i]} action, leading to {cBandit.bandits[i][action[i]]:5.6f} result') | |
print(f'This guess for {i} bandit was {action[i]==np.argmax(cBandit.bandits[i])}') | |
prob = prob + cBandit.bandits[i][action[i]] | |
prob = prob / cBandit.num_actions | |
print(cBandit.bandits) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment