Skip to content

Instantly share code, notes, and snippets.

@dast1
Created February 3, 2018 22:11
Show Gist options
  • Save dast1/57ca4f53070da4447c80a54601f705e5 to your computer and use it in GitHub Desktop.
Save dast1/57ca4f53070da4447c80a54601f705e5 to your computer and use it in GitHub Desktop.
Simple Random Sampling vs Thompson Algorithm
# Question 3
# Import the libraries
import numpy as np
import pandas as pd
# Recreate the dataset function
def recreate_dataset(set_size, conversion_ratios):
import random
dataset = [[0]*len(conversion_ratios) for _ in range(set_size)]
dataset_idx = list(range(set_size))
for i in range(0, len(conversion_ratios)):
conversion_ratio = conversion_ratios[i]
random.shuffle(dataset_idx) # randomly shufle the dataset index
# and select the required number of indeces from the begining
random_idx = dataset_idx[0:round(set_size * conversion_ratio)]
# change the values of the dataset to 1 for the random index
for idx in random_idx:
dataset[idx][i] = 1
# reduce the dataset index to exclude previously selected indeces
dataset_idx = [e for e in dataset_idx if e not in random_idx]
return dataset
# Simple random sampling function
def simple_random(dataset):
N = len(dataset)
d = len(dataset.columns)
from random import randint
random_selections = []
random_matches = []
for n in range(0, N):
random_selection = randint(0,d-1)
random_selections.append(random_selection)
if dataset.loc[n, list(dataset)[random_selection]] == 1:
random_matches.append(random_selection)
else:
random_matches.append(None)
all_selections = []
numbers_of_rewards_1 = []
for n in range(0,len(conversion_ratios)):
all_selections.append(len([selection for selection in random_selections if selection == n]))
numbers_of_rewards_1.append(len([selection for selection in random_matches if selection == n]))
return [numbers_of_rewards_1, all_selections]
# Thompson sampling function
def thompson(dataset):
N = len(dataset)
d = len(dataset.columns)
import random
thompson_selections = [0] * d
numbers_of_rewards_1 = [0] * d
numbers_of_rewards_0 = [0] * d
total_reward = 0
for n in range(0, N):
variant = 0
max_random = 0
for i in range(0, d):
random_beta = random.betavariate(numbers_of_rewards_1[i] + 1, numbers_of_rewards_0[i] + 1)
if random_beta > max_random:
max_random = random_beta
variant = i
thompson_selections.append(variant)
reward = dataset.values[n, variant]
if reward == 1:
numbers_of_rewards_1[variant] = numbers_of_rewards_1[variant] + 1
else:
numbers_of_rewards_0[variant] = numbers_of_rewards_0[variant] + 1
total_reward = total_reward + reward
all_selections = [sum(x) for x in zip(*[numbers_of_rewards_1,numbers_of_rewards_0])]
return [numbers_of_rewards_1, all_selections]
# Set parameters
variant_names = ['baseline','variant1','variant2','variant3','variant4']
views = [595, 599, 622, 606, 578]
quotes = [32, 30, 18, 51, 38]
conversion_ratios = [quotes[i] / views[i] for i in range(0, len(views))]
# Recreate dataset
dataset = recreate_dataset(30000, # total
conversion_ratios,
)
dataset = pd.DataFrame(dataset, columns = variant_names) # convert to Pandas dataframe and add headers
all_quotes = dataset.sum()
# Run Simple Random Sampling algorithm
random_selections = simple_random(dataset)
# Run Thompson Sampling algorithm
thompson_selections = thompson(dataset)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment