Skip to content

Instantly share code, notes, and snippets.

@robinvanemden
Created November 17, 2017 07:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robinvanemden/e24074b4d13d4de0a3ea1b67bfe22cc1 to your computer and use it in GitHub Desktop.
Save robinvanemden/e24074b4d13d4de0a3ea1b67bfe22cc1 to your computer and use it in GitHub Desktop.
import random
import time
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as la
DIM = 10.0
GRAPH_NUM = 520
class arm(object):
def __init__(self, no, x):
self.no = no
self.x = x
self.n = 0
self.reward = 0.0
self.phi = np.zeros((DIM, 1))
def __str__(self):
return str(self.no) + ": " + str(self.x)
def update(self, reward):
self.n += 1
self.reward += reward
def pull_arm(arm, theta_star):
return np.squeeze(np.dot(arm.x, theta_star))
def calc_arm(arm, theta):
return np.squeeze(np.dot(arm.x, theta_star))
def make_arm(num_arms):
arms = []
for i in range(num_arms):
arms.append(arm(i, np.random.rand(1, DIM) / DIM))
return arms
def draw_regret_graph(pulled_arms, mu_star, func, n1, n2, typename):
cummlative_regrets = []
average_regrets = []
cummulative_regret = 0
for i, arm in enumerate(pulled_arms):
regret = np.squeeze(mu_star - func(arm))
cummulative_regret += regret
cummlative_regrets.append(cummulative_regret)
average_regrets.append(cummulative_regret / (i+1.0))
#plt.subplot(GRAPH_NUM + n1)
#plt.plot(range(len(cummlative_regrets)), cummlative_regrets)
#plt.title("cummlative regret (%s)" % typename)
##plt.ylim(0, 100)
#plt.xscale("log")
#plt.subplot(GRAPH_NUM + n2)
#plt.plot(range(len(average_regrets)), average_regrets)
#plt.title("average regret (%s)"% typename)
##plt.ylim(0, 0.1)
#plt.xscale("log")
return [cummlative_regrets, average_regrets]
class linucb_algorithm(object):
def __init__(self):
self.A = np.identity(DIM)
self.b = np.zeros((DIM, 1))
self.A_inv = np.identity(DIM)
self.theta = self.A_inv.dot(self.b)
self.pulled_arms = []
def select(self, arms, n):
max_linucb_comparator = lambda arm: np.squeeze(np.dot(arm.x, self.theta)
+ np.sqrt(np.dot(arm.x, self.A_inv.dot(np.transpose(arm.x)))))
return max(arms, key=max_linucb_comparator)
def update(self, arm, reward):
self.A = self.A + np.outer(arm.x, arm.x)
self.b = self.b + reward * np.transpose(arm.x)
self.A_inv = la.inv(self.A)
self.theta = self.A_inv.dot(self.b)
self.pulled_arms.append(arm)
class ucb1_algorithm(object):
def __init__(self):
self.pulled_arms = []
def select(self, arms, n):
max_ucb_comparator = lambda arm: 100000 if (arm.n == 0) else (arm.reward / arm.n + np.sqrt(2 * np.log(n) / arm.n))
return max(arms, key=max_ucb_comparator)
def update(self, arm, reward):
self.pulled_arms.append(arm)
arm.update(reward)
class flinucb_algorithm(object):
def __init__(self, dev_type=""):
self.theta = np.zeros((DIM, 1))
self.pairs = []
self.pulled_arms = []
self.dev_type = dev_type
def select(self, arms, n):
n = n + 1
max_linucb_sgd_comparator = lambda arm: 0
if self.dev_type == "n/":
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
+ np.sqrt( np.dot(arm.x, arm.phi) / n ) )
elif self.dev_type == "n*":
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
+ np.sqrt( np.dot(arm.x, arm.phi) * n ) )
else:
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
+ np.sqrt( np.dot(arm.x, arm.phi) ) )
if self.pairs:
gamma_n = 1.0 / (100.0 + n)
lambda_n = 1.0 / (n ** (1 - 0.6))
pair = random.choice(self.pairs)
self.theta = self.theta + gamma_n * (
(pair[1] - np.dot(pair[0], self.theta)) * np.transpose(pair[0]) -
lambda_n * self.theta)
for arm in arms:
arm.phi = arm.phi + gamma_n * ( np.transpose(arm.x) / n -
( np.dot(pair[0], arm.phi) ) * np.transpose(pair[0]) )
return max(arms, key=max_linucb_sgd_comparator)
def update(self, arm, reward):
self.pairs.append((arm.x, reward))
self.pulled_arms.append(arm)
def trial(num_trials, arms, puller, algorithm):
trials_vs_time = []
start = time.time()
for n in range(num_trials):
arm = algorithm.select(arms, n)
algorithm.update(arm, puller(arm))
trials_vs_time.append(time.time() - start)#
return trials_vs_time
if __name__ == '__main__':
np.random.seed(42)
num_armsets = 10
num_trials = 10000
num_arms = 25
linucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
flinucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
flinucbmn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
flinucbdn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
ucb1_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
times = np.array([0.0] * num_trials)
start = time.time()
for armset in range(num_armsets):
arms = make_arm(num_arms)
theta = np.random.randn(DIM, 1)
pull = lambda arm: pull_arm(arm, theta)
#for arm in arms:
# print ("arm %d: x norm: %f, mu=%f" % (arm.no, la.norm(arm.x), np.squeeze(np.dot(arm.x, theta_star))))
# best arm
e_arm = lambda arm: np.squeeze( np.dot(arm.x, theta) )
best_arm = max(arms, key=e_arm)
best_mu = e_arm(best_arm)
flinucb = flinucb_algorithm()
flinucbdn = flinucb_algorithm("n/")
flinucbmn = flinucb_algorithm("n*")
linucb = linucb_algorithm()
ucb1 = ucb1_algorithm()
times += trial(num_trials, arms, pull, linucb)
#times += trial(num_trials, arms, pull, flinucb)
#times += trial(num_trials, arms, pull, flinucbmn)
#times += trial(num_trials, arms, pull, flinucbdn)
#times += trial(num_trials, arms, pull, ucb1)
#linucb_regrets += draw_regret_graph( linucb .pulled_arms, best_mu, e_arm,
# 1, 2, "LinUCB")
#flinucb_regrets += draw_regret_graph(flinucb.pulled_arms, best_mu, e_arm,
# 3, 4, "fLinUCB")
#flinucbmn_regrets += draw_regret_graph(flinucbmn.pulled_arms, best_mu, e_arm,
# 5, 6, "fLinUCB (n*)")
#flinucbdn_regrets += draw_regret_graph(flinucbdn.pulled_arms, best_mu, e_arm,
# 7, 8, "fLinUCB (n/)")
#ucb1_regrets += draw_regret_graph(ucb1.pulled_arms, best_mu, e_arm,
# 9, 10, "UCB1")
for i, elapsed in enumerate(times):
print("%d %f" % (i, elapsed / float(num_armsets)))
#print("# i,LinUCB,fLinUCB-GD,fLinUCB-GD(*),fLinUCB-GD(/),UCB1")
#for i in range(len(flinucb_regrets[0])):
# print("%d %f %f %f %f %f %f %f %f %f %f" %
# (i,
# linucb_regrets[0][i]/float(num_armsets),
# flinucb_regrets[0][i]/float(num_armsets),
# flinucbmn_regrets[0][i]/float(num_armsets),
# flinucbdn_regrets[0][i]/float(num_armsets),
# ucb1_regrets[0][i]/float(num_armsets),
# linucb_regrets[1][i]/float(num_armsets),
# flinucb_regrets[1][i]/float(num_armsets),
# flinucbmn_regrets[1][i]/float(num_armsets),
# flinucbdn_regrets[1][i]/float(num_armsets),
# ucb1_regrets[1][i]/float(num_armsets)))
#print ("best arm: %d" % best_arm.no)
#print ("last arm ( LinUCB): %d" % linucb.pulled_arms[-1].no)
#print ("last arm (fLinUCB): %d" % flinucb.pulled_arms[-1].no)
#print ("last arm ( UCB): %d" % ucb1.pulled_arms[-1].no)
#draw_regret_graph(linucb.pulled_arms, best_mu, bound_comparator, 1, 2, "LinUCB")
#draw_regret_graph(flinucb.pulled_arms, best_mu, bound_comparator, 5, 6, "fLinUCB")
#draw_regret_graph(ucb1.pulled_arms, best_mu, bound_comparator, 3, 4, "UCB")
#plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment