Skip to content

Instantly share code, notes, and snippets.

View WhatIThinkAbout's full-sized avatar

Steve Roberts WhatIThinkAbout

View GitHub Profile
@WhatIThinkAbout
WhatIThinkAbout / StochasticPowerSocket.py
Last active July 21, 2020 16:46
Stochastic Power Socket
class PowerSocket:
""" the base power socket class """
def __init__(self, q):
self.q = q # the true reward value
self.initialize() # reset the socket
def initialize(self):
self.Q = 0 # the estimate of this socket's reward value
self.n = 0 # the number of times this socket has been tried
@WhatIThinkAbout
WhatIThinkAbout / Power_Socket_Testing.py
Last active July 21, 2020 17:21
Create 5 power sockets and get charge from each
# create 5 sockets in a fixed order
socket_order = [2,1,3,5,4]
# create the sockets
# - the mean value of each socket is derived from the socket order index, which is doubled to give
# distinct values and offset by 2 to keep the distribution above zero
sockets = [PowerSocket((q*2)+2) for q in socket_order]
# save the number of sockets
NUM_SOCKETS = len(socket_order)
@WhatIThinkAbout
WhatIThinkAbout / Optimistic_Power_Socket.py
Last active April 25, 2020 14:26
Optimistic power socket derived from standard power socket
# Create an Optimistic Power Socket class by inheriting from the standard Power Socket
class OptimisticPowerSocket( PowerSocket ):
def __init__(self, q, initial_estimate ):
# pass the true reward value to the base PowerSocket
super().__init__(q)
# estimate of this socket's reward value
# - set to supplied initial value
self.Q = initial_estimate
@WhatIThinkAbout
WhatIThinkAbout / EpsilonGreedySocketTester.py
Last active July 20, 2020 13:54
Socket tester class for Epsilon-Greedy algorithm
class EpsilonGreedySocketTester( SocketTester ):
def __init__(self, epsilon = 0. ):
# create a standard socket tester
super().__init__()
# save the probability of selecting the non-greedy action
self.epsilon = epsilon
class UCBSocketTester( SocketTester ):
def __init__(self, socket_order, confidence_level = 2.0 ):
super().__init__(socket_order) # create a standard socket tester
self.confidence_level = confidence_level # save the confidence_level
def ucb(self, Q, t, n):
if n == 0: return float('inf')
return Q + self.confidence_level * (np.sqrt(np.log(t) / n))
@WhatIThinkAbout
WhatIThinkAbout / GaussianThompsonSocket.py
Last active July 20, 2020 15:48
Thompson Sampling for a socket with a Gaussian output distribution.
class GaussianThompsonSocket( PowerSocket ):
def __init__(self, q):
self.τ_0 = 0.0001 # the posterior precision
self.μ_0 = 1 # the posterior mean
# pass the true reward value to the base PowerSocket
super().__init__(q)
def sample(self):
@WhatIThinkAbout
WhatIThinkAbout / BernoulliThompsonSocket.py
Last active January 15, 2021 02:59
Thompson sampling on a Bernoulli probabilistic power socket.
class BernoulliThompsonSocket( PowerSocket ):
def __init__( self, q ):
self.α = 1 # the number of times this socket returned a charge
self.β = 1 # the number of times no charge was returned
# pass the true reward value to the base PowerSocket
super().__init__(q)
def charge(self):
@WhatIThinkAbout
WhatIThinkAbout / UCBSocket.py
Created July 20, 2020 13:45
Upper Confidence Bound Socket
class UCBSocket( PowerSocket ):
def __init__( self, q, **kwargs ):
""" initialize the UCB socket """
# store the confidence level controlling exploration
self.confidence_level = kwargs.pop('confidence_level', 2.0)
# pass the true reward value to the base PowerSocket
super().__init__(q)
@WhatIThinkAbout
WhatIThinkAbout / SocketTester.py
Last active February 17, 2021 10:55
create and test a set of sockets over a single test run
class SocketTester():
""" create and test a set of sockets over a single test run """
def __init__(self, socket=PowerSocket, socket_order=socket_order, **kwargs ):
# create supplied socket type with a mean value defined by socket order
self.sockets = [socket((q*2)+2, **kwargs) for q in socket_order]
def charge_and_update(self,socket_index):
""" charge from the chosen socket and update its mean reward value """
reward = self.sockets[socket_index].charge()
@WhatIThinkAbout
WhatIThinkAbout / random_argmax.py
Created July 22, 2020 16:38
a random tie-breaking argmax
# return the index of the largest value in the supplied list
# - arbitrarily select between the largest values in the case of a tie
# (the standard np.argmax just chooses the first value in the case of a tie)
def random_argmax(value_list):
""" a random tie-breaking argmax """
values = np.asarray(value_list)
return np.argmax(np.random.random(values.shape) * (values==values.max()))