Skip to content

Instantly share code, notes, and snippets.

@eliemichel
Created November 15, 2016 11:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliemichel/52db936ee32bc52ff90443a1e961368e to your computer and use it in GitHub Desktop.
Save eliemichel/52db936ee32bc52ff90443a1e961368e to your computer and use it in GitHub Desktop.
MVA — Reinforcement Learning — TP2
# ########################################################################### #
# MVA -- Reinforcement Learning -- TP2
# ########################################################################### #
#
# Code base for the TP2 of the MVA lecture Reinforcement Learning, by
# Alessandro Lazaric. This is a Python port of the MATLAB base provided by the
# TP advisor Émilie Kaufmann:
# http://chercheurs.lille.inria.fr/ekaufman/teaching.html
#
# Do not hesitate to report any suggestion or bugfix to
# Élie Michel <elie.michel@ens.fr>
#
# ########################################################################### #
#
# This piece of software is released under the MIT License:
#
# Copyright (c) 2016 Élie Michel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# ########################################################################### #
import numpy as np
from numpy import exp, log
from numpy.random import random, beta
class ArmBernoulli():
"""Bernoulli arm"""
def __init__(self, p):
"""
p: Bernoulli parameter
"""
self.p = p
self.mean = p
self.var = p * (1 - p)
def sample(self):
reward = random() < self.p
return reward
class ArmBeta():
"""arm having a Beta distribution"""
def __init__(self, a, b):
"""
a: first beta parameter
b: second beta parameter
"""
self.a = a
self.b = b
self.mean = a / (a + b)
self.var = (a * b) / ((a + b) ** 2 * (a + b + 1))
def sample(self):
reward = beta(self.a, self.b)
return reward
class ArmExp():
"""arm with trucated exponential distribution"""
def __init__(self, lambd):
"""
lambd: parameter of the exponential distribution
"""
self.lambd = lambd
self.mean = (1 / lambd) * (1 - exp(-lambd))
self.var = 1 # compute it yourself!
def sample(self):
reward = min(-1 / self.lambd * log(random()), 1)
return reward
def simu(p):
"""
draw a sample of a finite-supported distribution that takes value
k with porbability p(k)
p: a vector of probabilities
"""
q = p.cumsum()
u = random()
i = 0
while u > q[i]:
i += 1
if i >= len(q):
raise ValueError("p does not sum to 1")
return i
class ArmFinite():
"""arm with finite support"""
def __init__(self, X, P):
"""
X: support of the distribution
P: associated probabilities
"""
self.X = np.array(X)
self.P = np.array(P)
self.mean = (self.X * self.P).sum()
self.var = (self.X ** 2 * self.P).sum() - self.mean ** 2
def sample(self):
i = simu(self.P)
reward = self.X[i]
return reward
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment