Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
def calculate_upper_bound(wins, num_selections, n):
average_reward = wins / num_selections
delta_i = math.sqrt(3/2 * math.log(n + 1) / num_selections)
upper_bound = average_reward + delta_i
return upper_bound
import math
import random
# Array to store which ads got shown
ads_selected = []
# Number of selections for each ad
number_of_selections_of_ad = [0] * d
# Number of times each ad got clicked
number_of_wins = [0] * d
total_reward = 0
# For each user
for n in range(0, N):
selected_ad = 0
max_upper_bound = 0
# Iterate over ads
for i in range(0, d):
# If ad has been selected atleast once
if(number_of_selections_of_ad[i] > 0):
# Calculating upper bound of distribution
upper_bound = calculate_upper_bound(number_of_wins[i], number_of_selections_of_ad[i], n)
# If ad has never been selected
else:
# Setting upper bound to be a very large number
upper_bound = 1e10
if upper_bound > max_upper_bound:
max_upper_bound = upper_bound
selected_ad = i
# Selecting the ad with the highest upper bound, and increasing its no. of selections
ads_selected.append(selected_ad)
number_of_selections_of_ad[selected_ad] = number_of_selections_of_ad[selected_ad] + 1
# Checking if the ad was clicked
reward = df.values[n, selected_ad]
if reward == 1:
number_of_wins[selected_ad] = number_of_wins[selected_ad] + 1
total_reward = total_reward + reward
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment