robinvanemden/bandit.py

## bandit.py
import random
import time
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as la

DIM = 10.0

GRAPH_NUM = 520

class arm(object):
    def __init__(self, no, x):
        self.no = no
        self.x = x
        self.n = 0
        self.reward = 0.0
        self.phi = np.zeros((DIM, 1))

    def __str__(self):
        return str(self.no) + ": " + str(self.x)

    def update(self, reward):
        self.n += 1
        self.reward += reward

def pull_arm(arm, theta_star):
    return np.squeeze(np.dot(arm.x, theta_star))

def calc_arm(arm, theta):
    return np.squeeze(np.dot(arm.x, theta_star))

def make_arm(num_arms):
    arms = []

    for i in range(num_arms):
        arms.append(arm(i, np.random.rand(1, DIM) / DIM))

    return arms

def draw_regret_graph(pulled_arms, mu_star, func, n1, n2, typename):
    cummlative_regrets = []
    average_regrets = []

    cummulative_regret = 0

    for i, arm in enumerate(pulled_arms):
        regret = np.squeeze(mu_star - func(arm))
        cummulative_regret += regret

        cummlative_regrets.append(cummulative_regret)
        average_regrets.append(cummulative_regret / (i+1.0))

    #plt.subplot(GRAPH_NUM + n1)
    #plt.plot(range(len(cummlative_regrets)), cummlative_regrets)
    #plt.title("cummlative regret (%s)" % typename)
    ##plt.ylim(0, 100)
    #plt.xscale("log")

    #plt.subplot(GRAPH_NUM + n2)
    #plt.plot(range(len(average_regrets)), average_regrets)
    #plt.title("average regret (%s)"% typename)
    ##plt.ylim(0, 0.1)
    #plt.xscale("log")

    return [cummlative_regrets, average_regrets]

class linucb_algorithm(object):
    def __init__(self):
        self.A           = np.identity(DIM)
        self.b           = np.zeros((DIM, 1))
        self.A_inv       = np.identity(DIM)
        self.theta       = self.A_inv.dot(self.b)
        self.pulled_arms = []

    def select(self, arms, n):
        max_linucb_comparator = lambda arm: np.squeeze(np.dot(arm.x, self.theta)
                + np.sqrt(np.dot(arm.x, self.A_inv.dot(np.transpose(arm.x)))))

        return max(arms, key=max_linucb_comparator)

    def update(self, arm, reward):
        self.A     = self.A + np.outer(arm.x, arm.x)
        self.b     = self.b + reward * np.transpose(arm.x)
        self.A_inv = la.inv(self.A)
        self.theta = self.A_inv.dot(self.b)

        self.pulled_arms.append(arm)

class ucb1_algorithm(object):
    def __init__(self):
        self.pulled_arms = []

    def select(self, arms, n):
        max_ucb_comparator = lambda arm: 100000 if (arm.n == 0) else (arm.reward / arm.n + np.sqrt(2 * np.log(n) / arm.n))

        return max(arms, key=max_ucb_comparator)

    def update(self, arm, reward):
        self.pulled_arms.append(arm)
        arm.update(reward)

class flinucb_algorithm(object):
    def __init__(self, dev_type=""):
        self.theta       = np.zeros((DIM, 1))
        self.pairs       = []
        self.pulled_arms = []
        self.dev_type    = dev_type

    def select(self, arms, n):
        n = n + 1
        max_linucb_sgd_comparator = lambda arm: 0

        if   self.dev_type == "n/":
            max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
                    + np.sqrt( np.dot(arm.x, arm.phi) / n ) )
        elif self.dev_type == "n*":
            max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
                    + np.sqrt( np.dot(arm.x, arm.phi) * n ) )
        else:
            max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
                    + np.sqrt( np.dot(arm.x, arm.phi) ) )

        if self.pairs:
            gamma_n  = 1.0 / (100.0 + n)
            lambda_n = 1.0 / (n ** (1 - 0.6))

            pair = random.choice(self.pairs)
            self.theta = self.theta + gamma_n * (
                    (pair[1] - np.dot(pair[0], self.theta)) * np.transpose(pair[0]) -
                    lambda_n * self.theta)

            for arm in arms:
                arm.phi = arm.phi + gamma_n * ( np.transpose(arm.x) / n -
                        ( np.dot(pair[0], arm.phi) ) * np.transpose(pair[0]) )

        return max(arms, key=max_linucb_sgd_comparator)

    def update(self, arm, reward):
        self.pairs.append((arm.x, reward))
        self.pulled_arms.append(arm)

def trial(num_trials, arms, puller, algorithm):
    trials_vs_time = []

    start = time.time()
    for n in range(num_trials):
        arm = algorithm.select(arms, n)
        algorithm.update(arm, puller(arm))

        trials_vs_time.append(time.time() - start)#

    return trials_vs_time

if __name__ == '__main__':
    np.random.seed(42)

    num_armsets = 10
    num_trials = 10000
    num_arms   = 25

    linucb_regrets    = np.array([[0.0] * num_trials, [0.0] * num_trials])
    flinucb_regrets   = np.array([[0.0] * num_trials, [0.0] * num_trials])
    flinucbmn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
    flinucbdn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
    ucb1_regrets      = np.array([[0.0] * num_trials, [0.0] * num_trials])

    times = np.array([0.0] * num_trials)

    start = time.time()

    for armset in range(num_armsets):
        arms  = make_arm(num_arms)
        theta = np.random.randn(DIM, 1)
        pull  = lambda arm: pull_arm(arm, theta)

        #for arm in arms:
        #    print ("arm %d: x norm: %f, mu=%f" % (arm.no, la.norm(arm.x), np.squeeze(np.dot(arm.x, theta_star))))

        # best arm
        e_arm = lambda arm: np.squeeze( np.dot(arm.x, theta) )
        best_arm = max(arms, key=e_arm)
        best_mu  = e_arm(best_arm)

        flinucb   = flinucb_algorithm()
        flinucbdn = flinucb_algorithm("n/")
        flinucbmn = flinucb_algorithm("n*")
        linucb    = linucb_algorithm()
        ucb1      = ucb1_algorithm()

        times += trial(num_trials, arms, pull,  linucb)
        #times += trial(num_trials, arms, pull, flinucb)
        #times += trial(num_trials, arms, pull, flinucbmn)
        #times += trial(num_trials, arms, pull, flinucbdn)
        #times += trial(num_trials, arms, pull,     ucb1)

        #linucb_regrets += draw_regret_graph( linucb  .pulled_arms, best_mu, e_arm,
        #        1, 2, "LinUCB")

        #flinucb_regrets += draw_regret_graph(flinucb.pulled_arms, best_mu, e_arm,
        #        3, 4, "fLinUCB")

        #flinucbmn_regrets += draw_regret_graph(flinucbmn.pulled_arms, best_mu, e_arm,
        #        5, 6, "fLinUCB (n*)")

        #flinucbdn_regrets += draw_regret_graph(flinucbdn.pulled_arms, best_mu, e_arm,
        #        7, 8, "fLinUCB (n/)")

        #ucb1_regrets += draw_regret_graph(ucb1.pulled_arms, best_mu, e_arm,
        #        9, 10, "UCB1")

    for i, elapsed in enumerate(times):
        print("%d %f" % (i, elapsed / float(num_armsets)))

    #print("# i,LinUCB,fLinUCB-GD,fLinUCB-GD(*),fLinUCB-GD(/),UCB1")
    #for i in range(len(flinucb_regrets[0])):
    #    print("%d %f %f %f %f %f %f %f %f %f %f" %
    #            (i,
    #                linucb_regrets[0][i]/float(num_armsets),
    #                flinucb_regrets[0][i]/float(num_armsets),
    #                flinucbmn_regrets[0][i]/float(num_armsets),
    #                flinucbdn_regrets[0][i]/float(num_armsets),
    #                ucb1_regrets[0][i]/float(num_armsets),

    #                linucb_regrets[1][i]/float(num_armsets),
    #                flinucb_regrets[1][i]/float(num_armsets),
    #                flinucbmn_regrets[1][i]/float(num_armsets),
    #                flinucbdn_regrets[1][i]/float(num_armsets),
    #                ucb1_regrets[1][i]/float(num_armsets)))

    #print ("best arm: %d" % best_arm.no)
    #print ("last arm ( LinUCB): %d" % linucb.pulled_arms[-1].no)
    #print ("last arm (fLinUCB): %d" % flinucb.pulled_arms[-1].no)
    #print ("last arm (    UCB): %d" % ucb1.pulled_arms[-1].no)

    #draw_regret_graph(linucb.pulled_arms,  best_mu, bound_comparator, 1, 2, "LinUCB")
    #draw_regret_graph(flinucb.pulled_arms, best_mu, bound_comparator, 5, 6, "fLinUCB")
    #draw_regret_graph(ucb1.pulled_arms,    best_mu, bound_comparator, 3, 4, "UCB")

    #plt.show()
	import random
	import time
	import matplotlib.pyplot as plt
	import numpy as np
	from numpy import linalg as la

	DIM = 10.0

	GRAPH_NUM = 520

	class arm(object):
	def __init__(self, no, x):
	self.no = no
	self.x = x
	self.n = 0
	self.reward = 0.0
	self.phi = np.zeros((DIM, 1))

	def __str__(self):
	return str(self.no) + ": " + str(self.x)

	def update(self, reward):
	self.n += 1
	self.reward += reward

	def pull_arm(arm, theta_star):
	return np.squeeze(np.dot(arm.x, theta_star))

	def calc_arm(arm, theta):
	return np.squeeze(np.dot(arm.x, theta_star))

	def make_arm(num_arms):
	arms = []

	for i in range(num_arms):
	arms.append(arm(i, np.random.rand(1, DIM) / DIM))

	return arms

	def draw_regret_graph(pulled_arms, mu_star, func, n1, n2, typename):
	cummlative_regrets = []
	average_regrets = []

	cummulative_regret = 0

	for i, arm in enumerate(pulled_arms):
	regret = np.squeeze(mu_star - func(arm))
	cummulative_regret += regret

	cummlative_regrets.append(cummulative_regret)
	average_regrets.append(cummulative_regret / (i+1.0))

	#plt.subplot(GRAPH_NUM + n1)
	#plt.plot(range(len(cummlative_regrets)), cummlative_regrets)
	#plt.title("cummlative regret (%s)" % typename)
	##plt.ylim(0, 100)
	#plt.xscale("log")

	#plt.subplot(GRAPH_NUM + n2)
	#plt.plot(range(len(average_regrets)), average_regrets)
	#plt.title("average regret (%s)"% typename)
	##plt.ylim(0, 0.1)
	#plt.xscale("log")

	return [cummlative_regrets, average_regrets]

	class linucb_algorithm(object):
	def __init__(self):
	self.A = np.identity(DIM)
	self.b = np.zeros((DIM, 1))
	self.A_inv = np.identity(DIM)
	self.theta = self.A_inv.dot(self.b)
	self.pulled_arms = []

	def select(self, arms, n):
	max_linucb_comparator = lambda arm: np.squeeze(np.dot(arm.x, self.theta)
	+ np.sqrt(np.dot(arm.x, self.A_inv.dot(np.transpose(arm.x)))))

	return max(arms, key=max_linucb_comparator)

	def update(self, arm, reward):
	self.A = self.A + np.outer(arm.x, arm.x)
	self.b = self.b + reward * np.transpose(arm.x)
	self.A_inv = la.inv(self.A)
	self.theta = self.A_inv.dot(self.b)

	self.pulled_arms.append(arm)

	class ucb1_algorithm(object):
	def __init__(self):
	self.pulled_arms = []

	def select(self, arms, n):
	max_ucb_comparator = lambda arm: 100000 if (arm.n == 0) else (arm.reward / arm.n + np.sqrt(2 * np.log(n) / arm.n))

	return max(arms, key=max_ucb_comparator)

	def update(self, arm, reward):
	self.pulled_arms.append(arm)
	arm.update(reward)

	class flinucb_algorithm(object):
	def __init__(self, dev_type=""):
	self.theta = np.zeros((DIM, 1))
	self.pairs = []
	self.pulled_arms = []
	self.dev_type = dev_type

	def select(self, arms, n):
	n = n + 1
	max_linucb_sgd_comparator = lambda arm: 0

	if self.dev_type == "n/":
	max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
	+ np.sqrt( np.dot(arm.x, arm.phi) / n ) )
	elif self.dev_type == "n*":
	max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
	+ np.sqrt( np.dot(arm.x, arm.phi) * n ) )
	else:
	max_linucb_sgd_comparator = lambda arm: np.squeeze( np.dot(arm.x, self.theta)
	+ np.sqrt( np.dot(arm.x, arm.phi) ) )

	if self.pairs:
	gamma_n = 1.0 / (100.0 + n)
	lambda_n = 1.0 / (n ** (1 - 0.6))

	pair = random.choice(self.pairs)
	self.theta = self.theta + gamma_n * (
	(pair[1] - np.dot(pair[0], self.theta)) * np.transpose(pair[0]) -
	lambda_n * self.theta)

	for arm in arms:
	arm.phi = arm.phi + gamma_n * ( np.transpose(arm.x) / n -
	( np.dot(pair[0], arm.phi) ) * np.transpose(pair[0]) )

	return max(arms, key=max_linucb_sgd_comparator)

	def update(self, arm, reward):
	self.pairs.append((arm.x, reward))
	self.pulled_arms.append(arm)

	def trial(num_trials, arms, puller, algorithm):
	trials_vs_time = []

	start = time.time()
	for n in range(num_trials):
	arm = algorithm.select(arms, n)
	algorithm.update(arm, puller(arm))

	trials_vs_time.append(time.time() - start)#

	return trials_vs_time

	if __name__ == '__main__':
	np.random.seed(42)

	num_armsets = 10
	num_trials = 10000
	num_arms = 25

	linucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
	flinucb_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
	flinucbmn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
	flinucbdn_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])
	ucb1_regrets = np.array([[0.0] * num_trials, [0.0] * num_trials])

	times = np.array([0.0] * num_trials)

	start = time.time()

	for armset in range(num_armsets):
	arms = make_arm(num_arms)
	theta = np.random.randn(DIM, 1)
	pull = lambda arm: pull_arm(arm, theta)

	#for arm in arms:
	# print ("arm %d: x norm: %f, mu=%f" % (arm.no, la.norm(arm.x), np.squeeze(np.dot(arm.x, theta_star))))

	# best arm
	e_arm = lambda arm: np.squeeze( np.dot(arm.x, theta) )
	best_arm = max(arms, key=e_arm)
	best_mu = e_arm(best_arm)

	flinucb = flinucb_algorithm()
	flinucbdn = flinucb_algorithm("n/")
	flinucbmn = flinucb_algorithm("n*")
	linucb = linucb_algorithm()
	ucb1 = ucb1_algorithm()

	times += trial(num_trials, arms, pull, linucb)
	#times += trial(num_trials, arms, pull, flinucb)
	#times += trial(num_trials, arms, pull, flinucbmn)
	#times += trial(num_trials, arms, pull, flinucbdn)
	#times += trial(num_trials, arms, pull, ucb1)

	#linucb_regrets += draw_regret_graph( linucb .pulled_arms, best_mu, e_arm,
	# 1, 2, "LinUCB")

	#flinucb_regrets += draw_regret_graph(flinucb.pulled_arms, best_mu, e_arm,
	# 3, 4, "fLinUCB")

	#flinucbmn_regrets += draw_regret_graph(flinucbmn.pulled_arms, best_mu, e_arm,
	# 5, 6, "fLinUCB (n*)")

	#flinucbdn_regrets += draw_regret_graph(flinucbdn.pulled_arms, best_mu, e_arm,
	# 7, 8, "fLinUCB (n/)")

	#ucb1_regrets += draw_regret_graph(ucb1.pulled_arms, best_mu, e_arm,
	# 9, 10, "UCB1")

	for i, elapsed in enumerate(times):
	print("%d %f" % (i, elapsed / float(num_armsets)))

	#print("# i,LinUCB,fLinUCB-GD,fLinUCB-GD(*),fLinUCB-GD(/),UCB1")
	#for i in range(len(flinucb_regrets[0])):
	# print("%d %f %f %f %f %f %f %f %f %f %f" %
	# (i,
	# linucb_regrets[0][i]/float(num_armsets),
	# flinucb_regrets[0][i]/float(num_armsets),
	# flinucbmn_regrets[0][i]/float(num_armsets),
	# flinucbdn_regrets[0][i]/float(num_armsets),
	# ucb1_regrets[0][i]/float(num_armsets),

	# linucb_regrets[1][i]/float(num_armsets),
	# flinucb_regrets[1][i]/float(num_armsets),
	# flinucbmn_regrets[1][i]/float(num_armsets),
	# flinucbdn_regrets[1][i]/float(num_armsets),
	# ucb1_regrets[1][i]/float(num_armsets)))

	#print ("best arm: %d" % best_arm.no)
	#print ("last arm ( LinUCB): %d" % linucb.pulled_arms[-1].no)
	#print ("last arm (fLinUCB): %d" % flinucb.pulled_arms[-1].no)
	#print ("last arm ( UCB): %d" % ucb1.pulled_arms[-1].no)

	#draw_regret_graph(linucb.pulled_arms, best_mu, bound_comparator, 1, 2, "LinUCB")
	#draw_regret_graph(flinucb.pulled_arms, best_mu, bound_comparator, 5, 6, "fLinUCB")
	#draw_regret_graph(ucb1.pulled_arms, best_mu, bound_comparator, 3, 4, "UCB")

	#plt.show()