Created
November 17, 2017 07:52
-
-
Save robinvanemden/e24074b4d13d4de0a3ea1b67bfe22cc1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from numpy import linalg as la | |
DIM = 10.0 | |
GRAPH_NUM = 520 | |
class arm(object): | |
def __init__(self, no, x): | |
self.no = no | |
self.x = x | |
self.n = 0 | |
self.reward = 0.0 | |
self.phi = np.zeros((DIM, 1)) | |
def __str__(self): | |
return str(self.no) + ": " + str(self.x) | |
def update(self, reward): | |
self.n += 1 | |
self.reward += reward | |
def pull_arm(arm, theta_star): | |
return np.squeeze(np.dot(arm.x, theta_star)) | |
def calc_arm(arm, theta): | |
return np.squeeze(np.dot(arm.x, theta_star)) | |
def make_arm(num_arms): | |
arms = [] | |
for i in range(num_arms): | |
arms.append(arm(i, np.random.rand(1, DIM) / DIM)) | |
return arms | |
def draw_regret_graph(pulled_arms, mu_star, func, n1, n2, typename): | |
cummlative_regrets = [] | |
average_regrets = [] | |
cummulative_regret = 0 | |
for i, arm in enumerate(pulled_arms): | |
regret = np.squeeze(mu_star - func(arm)) | |
cummulative_regret += regret | |
cummlative_regrets.append(cummulative_regret) | |
average_regrets.append(cummulative_regret / (i+1.0)) | |
#plt.subplot(GRAPH_NUM + n1) | |
#plt.plot(range(len(cummlative_regrets)), cummlative_regrets) | |
#plt.title("cummlative regret (%s)" % typename) | |
##plt.ylim(0, 100) | |
#plt.xscale("log") | |
#plt.subplot(GRAPH_NUM + n2) | |
#plt.plot(range(len(average_regrets)), average_regrets) | |
#plt.title("average regret (%s)"% typename) | |
##plt.ylim(0, 0.1) | |
#plt.xscale("log") | |
return [cummlative_regrets, average_regrets] | |
class linucb_algorithm(object): | |
def __init__(self): | |
self.A = np.identity(DIM) | |
self.b = np.zeros((DIM, 1)) | |
self.A_inv = np.identity(DIM) | |
self.theta = self.A_inv.dot(self.b) | |
self.pulled_arms = [] | |
def select(self, arms, n): | |
max_linucb_comparator = lambda arm: np.squeeze(np.dot(arm.x, self.theta) | |
+ np.sqrt(np.dot(arm.x, self.A_inv.dot(np.transpose(arm.x))))) | |
return max(arms, key=max_linucb_comparator) | |
def update(self, arm, reward): | |
self.A = self.A + np.outer(arm.x, arm.x) | |
self.b = self.b + reward * np.transpose(arm.x) | |
self.A_inv = la.inv(self.A) | |
self.theta = self.A_inv.dot(self.b) | |
self.pulled_arms.append(arm) | |
class ucb1_algorithm(object): | |
def __init__(self): | |
self.pulled_arms = [] | |
def select(self, arms, n): | |
max_ucb_comparator = lambda arm: 100000 if (arm.n == 0) else (arm.reward / arm.n + np.sqrt(2 * np.log(n) / arm.n)) | |
return max(arms, key=max_ucb_comparator) | |
def update(self, arm, reward): | |
self.pulled_arms.append(arm) | |
arm.update(reward) | |
class flinucb_algorithm(object): | |
def __init__(self, dev_type=""): | |
self.theta = np.zeros((DIM, 1)) | |
self.pairs = [] | |
self.pulled_arms = [] | |
self.dev_type = dev_type | |
def select(self, arms, n): | |
n = n + 1 | |
max_linucb_sgd_comparator = lambda arm: 0 | |
if self.dev_type == "n/": | |
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta) | |
+ np.sqrt( np.dot(arm.x, arm.phi) / n ) ) | |
elif self.dev_type == "n*": | |
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta) | |
+ np.sqrt( np.dot(arm.x, arm.phi) * n ) ) | |
else: | |
max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta) | |
+ np.sqrt( np.dot(arm.x, arm.phi) ) ) | |
if self.pairs: | |
gamma_n = 1.0 / (100.0 + n) | |
lambda_n = 1.0 / (n ** (1 - 0.6)) | |
pair = random.choice(self.pairs) | |
self.theta = self.theta + gamma_n * ( | |
(pair[1] - np.dot(pair[0], self.theta)) * np.transpose(pair[0]) - | |
lambda_n * self.theta) | |
for arm in arms: | |
arm.phi = arm.phi + gamma_n * ( np.transpose(arm.x) / n - | |
( np.dot(pair[0], arm.phi) ) * np.transpose(pair[0]) ) | |
return max(arms, key=max_linucb_sgd_comparator) | |
def update(self, arm, reward): | |
self.pairs.append((arm.x, reward)) | |
self.pulled_arms.append(arm) | |
def trial(num_trials, arms, puller, algorithm): | |
trials_vs_time = [] | |
start = time.time() | |
for n in range(num_trials): | |
arm = algorithm.select(arms, n) | |
algorithm.update(arm, puller(arm)) | |
trials_vs_time.append(time.time() - start)# | |
return trials_vs_time | |
if __name__ == '__main__': | |
np.random.seed(42) | |
num_armsets = 10 | |
num_trials = 10000 | |
num_arms = 25 | |
linucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials]) | |
flinucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials]) | |
flinucbmn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials]) | |
flinucbdn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials]) | |
ucb1_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials]) | |
times = np.array([0.0] * num_trials) | |
start = time.time() | |
for armset in range(num_armsets): | |
arms = make_arm(num_arms) | |
theta = np.random.randn(DIM, 1) | |
pull = lambda arm: pull_arm(arm, theta) | |
#for arm in arms: | |
# print ("arm %d: x norm: %f, mu=%f" % (arm.no, la.norm(arm.x), np.squeeze(np.dot(arm.x, theta_star)))) | |
# best arm | |
e_arm = lambda arm: np.squeeze( np.dot(arm.x, theta) ) | |
best_arm = max(arms, key=e_arm) | |
best_mu = e_arm(best_arm) | |
flinucb = flinucb_algorithm() | |
flinucbdn = flinucb_algorithm("n/") | |
flinucbmn = flinucb_algorithm("n*") | |
linucb = linucb_algorithm() | |
ucb1 = ucb1_algorithm() | |
times += trial(num_trials, arms, pull, linucb) | |
#times += trial(num_trials, arms, pull, flinucb) | |
#times += trial(num_trials, arms, pull, flinucbmn) | |
#times += trial(num_trials, arms, pull, flinucbdn) | |
#times += trial(num_trials, arms, pull, ucb1) | |
#linucb_regrets += draw_regret_graph( linucb .pulled_arms, best_mu, e_arm, | |
# 1, 2, "LinUCB") | |
#flinucb_regrets += draw_regret_graph(flinucb.pulled_arms, best_mu, e_arm, | |
# 3, 4, "fLinUCB") | |
#flinucbmn_regrets += draw_regret_graph(flinucbmn.pulled_arms, best_mu, e_arm, | |
# 5, 6, "fLinUCB (n*)") | |
#flinucbdn_regrets += draw_regret_graph(flinucbdn.pulled_arms, best_mu, e_arm, | |
# 7, 8, "fLinUCB (n/)") | |
#ucb1_regrets += draw_regret_graph(ucb1.pulled_arms, best_mu, e_arm, | |
# 9, 10, "UCB1") | |
for i, elapsed in enumerate(times): | |
print("%d %f" % (i, elapsed / float(num_armsets))) | |
#print("# i,LinUCB,fLinUCB-GD,fLinUCB-GD(*),fLinUCB-GD(/),UCB1") | |
#for i in range(len(flinucb_regrets[0])): | |
# print("%d %f %f %f %f %f %f %f %f %f %f" % | |
# (i, | |
# linucb_regrets[0][i]/float(num_armsets), | |
# flinucb_regrets[0][i]/float(num_armsets), | |
# flinucbmn_regrets[0][i]/float(num_armsets), | |
# flinucbdn_regrets[0][i]/float(num_armsets), | |
# ucb1_regrets[0][i]/float(num_armsets), | |
# linucb_regrets[1][i]/float(num_armsets), | |
# flinucb_regrets[1][i]/float(num_armsets), | |
# flinucbmn_regrets[1][i]/float(num_armsets), | |
# flinucbdn_regrets[1][i]/float(num_armsets), | |
# ucb1_regrets[1][i]/float(num_armsets))) | |
#print ("best arm: %d" % best_arm.no) | |
#print ("last arm ( LinUCB): %d" % linucb.pulled_arms[-1].no) | |
#print ("last arm (fLinUCB): %d" % flinucb.pulled_arms[-1].no) | |
#print ("last arm ( UCB): %d" % ucb1.pulled_arms[-1].no) | |
#draw_regret_graph(linucb.pulled_arms, best_mu, bound_comparator, 1, 2, "LinUCB") | |
#draw_regret_graph(flinucb.pulled_arms, best_mu, bound_comparator, 5, 6, "fLinUCB") | |
#draw_regret_graph(ucb1.pulled_arms, best_mu, bound_comparator, 3, 4, "UCB") | |
#plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment