Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import random
# How many times each ad was clicked
ad_rewards = [0] * bandits
# How many times each ad was selected
ad_selection = [0] * bandits
# N: Number of users
N = df.shape[0]
# bandits: Number of ads
bandits = df.shape[1]
ads_selected = []
total_reward = 0
# For 10% of users:
for n in range(0, round(0.1*N)):
# Choosing a random ad
ad = random.randrange(bandits)
ads_selected.append(ad)
ad_selection[ad] += 1
# Checking if ad was clicked
reward = df.values[n, ad]
ad_rewards[ad] += reward
total_reward = total_reward + reward
# Finding the best ad yet, based on click %age
ad_reward_rates_yet = [i/j for i, j in zip(adwise_rewards, adwise_selection)]
best_ad_yet = ad_reward_rates_yet.index(max(ad_reward_rates_yet))
# Showing that ad to remaining users
for n in range(round(0.1*N), N):
ad = 1
ads_selected.append(best_ad_yet)
reward = df.values[n, ad]
total_reward = total_reward + reward
print(total_reward)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment