Skip to content

Instantly share code, notes, and snippets.

@mkowoods
Created April 22, 2015 19:36
Show Gist options
  • Save mkowoods/1e4aea514d4bb916db94 to your computer and use it in GitHub Desktop.
Save mkowoods/1e4aea514d4bb916db94 to your computer and use it in GitHub Desktop.
Q Learning Sketches
#-------------------------------------------------------------------------------
# Name: module2
# Purpose:
#
# Author: mwoods
#
# Created: 22/04/2015
# Copyright: (c) mwoods 2015
# Licence: <your licence>
#-------------------------------------------------------------------------------
#QLeraning Based on Examples
qvals = {}
states = ['A', 'B']
actions = ['Up', 'Down']
for s in states:
for a in actions:
qvals.setdefault((s,a), 0)
#ordered list of observations
data = [['A', 'Down', 'B', 2],
['B', 'Down', 'B', -4],
['B', 'Up', 'B', 0],
['B', 'Up', 'A', 3],
['A', 'Up', 'A', 1]]
gamma = 0.5
alpha = 0.5
for line in data:
st, at, stplus1, rt = line
qvals[(st, at)] = (1 - alpha)*(qvals[(st, at)]) + alpha*(rt + gamma*max([qvals[(stplus1,a)] for a in actions]))
#Q Learning with estimates for T and R
qvals = {}
states = ['A', 'B']
actions = ['Up', 'Down']
for s in states:
for a in actions:
qvals.setdefault((s,a), 0)
data = [['A', 'Up', 'A', 1.0, 10.0],
['A', 'Down', 'A', 0.5, 2.0],
['A', 'Down', 'B', 0.5, 2.0],
['B', 'Up', 'A', 1.0, -5.0],
['B', 'Down', 'B', 1.0, 8.0]]
gamma = 0.25
alpha = 0.25
trans_dict = {}
for line in data:
st, at, stplus1, t_st_at_stplus1, r_st_at_stplus1 = line
trans_dict[(st, at, stplus1)] = (t_st_at_stplus1, r_st_at_stplus1)
for n in range(10):
old_qvals = qvals.copy()
for s1 in states:
for a1 in actions:
qvals[(s1,a1)] = sum(trans_dict.get((s1, a1, s2), (0.0, 0.0))[0]*(trans_dict.get((s1, a1, s2), (0.0, 0.0))[1] + gamma*max([old_qvals[(s2, a2)] for a2 in actions])) for s2 in states)
print qvals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment