SidJain1412/ucb_reinforcement.py

## ucb_reinforcement.py
def calculate_upper_bound(wins, num_selections, n):
    average_reward = wins / num_selections
    delta_i = math.sqrt(3/2 * math.log(n + 1) / num_selections)
    upper_bound = average_reward + delta_i
    return upper_bound


import math
import random

# Array to store which ads got shown
ads_selected = []
# Number of selections for each ad
number_of_selections_of_ad = [0] * d
# Number of times each ad got clicked
number_of_wins = [0] * d
total_reward = 0

# For each user
for n in range(0, N):
    selected_ad = 0
    max_upper_bound = 0
    # Iterate over ads
    for i in range(0, d):
        # If ad has been selected atleast once
        if(number_of_selections_of_ad[i] > 0):
            # Calculating upper bound of distribution
            upper_bound = calculate_upper_bound(number_of_wins[i], number_of_selections_of_ad[i], n)
        # If ad has never been selected
        else:
            # Setting upper bound to be a very large number
            upper_bound = 1e10

        if upper_bound > max_upper_bound:
            max_upper_bound = upper_bound
            selected_ad = i

    # Selecting the ad with the highest upper bound, and increasing its no. of selections
    ads_selected.append(selected_ad)
    number_of_selections_of_ad[selected_ad] = number_of_selections_of_ad[selected_ad] + 1

    # Checking if the ad was clicked
    reward = df.values[n, selected_ad]
    if reward == 1:
        number_of_wins[selected_ad] = number_of_wins[selected_ad] + 1

    total_reward = total_reward + reward
	def calculate_upper_bound(wins, num_selections, n):
	average_reward = wins / num_selections
	delta_i = math.sqrt(3/2 * math.log(n + 1) / num_selections)
	upper_bound = average_reward + delta_i
	return upper_bound


	import math
	import random

	# Array to store which ads got shown
	ads_selected = []
	# Number of selections for each ad
	number_of_selections_of_ad = [0] * d
	# Number of times each ad got clicked
	number_of_wins = [0] * d
	total_reward = 0

	# For each user
	for n in range(0, N):
	selected_ad = 0
	max_upper_bound = 0
	# Iterate over ads
	for i in range(0, d):
	# If ad has been selected atleast once
	if(number_of_selections_of_ad[i] > 0):
	# Calculating upper bound of distribution
	upper_bound = calculate_upper_bound(number_of_wins[i], number_of_selections_of_ad[i], n)
	# If ad has never been selected
	else:
	# Setting upper bound to be a very large number
	upper_bound = 1e10

	if upper_bound > max_upper_bound:
	max_upper_bound = upper_bound
	selected_ad = i

	# Selecting the ad with the highest upper bound, and increasing its no. of selections
	ads_selected.append(selected_ad)
	number_of_selections_of_ad[selected_ad] = number_of_selections_of_ad[selected_ad] + 1

	# Checking if the ad was clicked
	reward = df.values[n, selected_ad]
	if reward == 1:
	number_of_wins[selected_ad] = number_of_wins[selected_ad] + 1

	total_reward = total_reward + reward