Created
April 22, 2015 19:36
-
-
Save mkowoods/1e4aea514d4bb916db94 to your computer and use it in GitHub Desktop.
Q Learning Sketches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------- | |
# Name: module2 | |
# Purpose: | |
# | |
# Author: mwoods | |
# | |
# Created: 22/04/2015 | |
# Copyright: (c) mwoods 2015 | |
# Licence: <your licence> | |
#------------------------------------------------------------------------------- | |
#QLeraning Based on Examples | |
qvals = {} | |
states = ['A', 'B'] | |
actions = ['Up', 'Down'] | |
for s in states: | |
for a in actions: | |
qvals.setdefault((s,a), 0) | |
#ordered list of observations | |
data = [['A', 'Down', 'B', 2], | |
['B', 'Down', 'B', -4], | |
['B', 'Up', 'B', 0], | |
['B', 'Up', 'A', 3], | |
['A', 'Up', 'A', 1]] | |
gamma = 0.5 | |
alpha = 0.5 | |
for line in data: | |
st, at, stplus1, rt = line | |
qvals[(st, at)] = (1 - alpha)*(qvals[(st, at)]) + alpha*(rt + gamma*max([qvals[(stplus1,a)] for a in actions])) | |
#Q Learning with estimates for T and R | |
qvals = {} | |
states = ['A', 'B'] | |
actions = ['Up', 'Down'] | |
for s in states: | |
for a in actions: | |
qvals.setdefault((s,a), 0) | |
data = [['A', 'Up', 'A', 1.0, 10.0], | |
['A', 'Down', 'A', 0.5, 2.0], | |
['A', 'Down', 'B', 0.5, 2.0], | |
['B', 'Up', 'A', 1.0, -5.0], | |
['B', 'Down', 'B', 1.0, 8.0]] | |
gamma = 0.25 | |
alpha = 0.25 | |
trans_dict = {} | |
for line in data: | |
st, at, stplus1, t_st_at_stplus1, r_st_at_stplus1 = line | |
trans_dict[(st, at, stplus1)] = (t_st_at_stplus1, r_st_at_stplus1) | |
for n in range(10): | |
old_qvals = qvals.copy() | |
for s1 in states: | |
for a1 in actions: | |
qvals[(s1,a1)] = sum(trans_dict.get((s1, a1, s2), (0.0, 0.0))[0]*(trans_dict.get((s1, a1, s2), (0.0, 0.0))[1] + gamma*max([old_qvals[(s2, a2)] for a2 in actions])) for s2 in states) | |
print qvals |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment