Skip to content

Instantly share code, notes, and snippets.

@thundergolfer
Last active May 3, 2018 00:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thundergolfer/c4d4926a1b902fe31617302ff94e2ee5 to your computer and use it in GitHub Desktop.
Save thundergolfer/c4d4926a1b902fe31617302ff94e2ee5 to your computer and use it in GitHub Desktop.
## USAGE
##
## Step 1: Clone aimacode python repo from Github
## Step 2: Place this script in the root directory of that repo
## Step 3: Run `python tute_8_value_iteration.py`
import types
def value_iteration_stepper(mdp, epsilon=0.001):
"""Solving an MDP by value iteration. [Figure 17.4]"""
U1 = {s: 0 for s in mdp.states}
R, T, gamma = mdp.R, mdp.T, mdp.gamma
while True:
U = U1.copy()
delta = 0
for s in mdp.states:
U1[s] = R(s) + gamma * max((sum(p*U[s1] for (p, s1) in T(s, a)) for a in mdp.actions(s)), default=0)
delta = max(delta, abs(U1[s] - U[s]))
yield U1
act_list = ['N', 'S', 'E', 'W']
terminals = [(1,1), (1,3), (3,1)]
transitions = {
(1, 1): {
'N': [(0.7, (1, 1)), (0.3, (1, 1))], 'S': [(0.8, (2,1)), (0.2, (1,2))],
'E': [(0.6, (1, 2)), (0.4, (2,1))], 'W': [(0.3, (1, 1)), (0.7, (1,1))],
'STAY': [(1.0, (1,1))]
},
(1, 2): {
'N': [(0.7, (1,2)), (0.3, (1,1))],
'S': [(0.8, (2,2)), (0.2, (1,3))],
'E': [(0.6, (1,3)), (0.4, (2,2))],
'W': [(0.7, (1,1)), (0.3, (1,2))],
'STAY': [(1.0, (1,2))]
},
(1, 3): {
'N': [(0.7, (1,3)), (0.3, (1,2))],
'S': [(0.8, (2,3)), (0.2, (1,3))],
'E': [(0.6, (1,3)), (0.4, (2,3))],
'W': [(0.7, (1,2)), (0.3, (1,3))],
'STAY': [(1.0, (1,3))]
},
(2, 1): {
'N': [(0.7, (1,1)), (0.3, (2,1))],
'S': [(0.8, (3,1)), (0.2, (2,2))],
'E': [(0.6, (2,2)), (0.4, (3,1))],
'W': [(0.7, (2,1)), (0.3, (1,1))],
'STAY': [(1.0, (2,1))]
},
(2, 2): {
'N': [(0.7, (1,2)), (0.3, (2,1))],
'S': [(0.8, (3,2)), (0.2, (2,3))],
'E': [(0.6, (2,3)), (0.4, (3,2))],
'W': [(0.7, (2,1)), (0.3, (1,2))],
'STAY': [(1.0, (2,2))]
},
(2, 3): {
'N': [(0.7, (1,3)), (0.3, (2,2))],
'S': [(0.8, (3,3)), (0.2, (2,3))],
'E': [(0.6, (2,3)), (0.4, (3,3))],
'W': [(0.7, (2,2)), (0.3, (1,3))],
'STAY': [(1.0, (2,3))]
},
(3, 1): {
'N': [(0.7, (2,1)), (0.3, (3,1))],
'S': [(0.8, (3,1)), (0.2, (3,2))],
'E': [(0.6, (3,2)), (0.4, (3,1))],
'W': [(0.7, (3,1)), (0.3, (2,1))],
'STAY': [(1.0, (3,1))]
},
(3, 2): {
'N': [(0.7, (2,2)), (0.3, (3,1))],
'S': [(0.8, (3,2)), (0.2, (3,3))],
'E': [(0.6, (3,3)), (0.4, (3,2))],
'W': [(0.7, (3,1)), (0.3, (2,2))],
'STAY': [(1.0, (3,2))]
},
(3, 3): {
'N': [(0.7, (2,3)), (0.3, (3,2))],
'S': [(0.8, (3,3)), (0.2, (3,3))],
'E': [(0.6, (3,3)), (0.4, (3,3))],
'W': [(0.7, (3,2)), (0.3, (2,3))],
'STAY': [(1.0, (3,3))]
}
}
rewards = {
(1, 1): 20,
(1, 2): -1,
(1, 3): 5,
(2, 1): -1,
(2, 2): -1,
(2, 3): -1,
(3, 1): -20,
(3, 2): -1,
(3, 3): -1
}
states = list(rewards.keys())
gamma = 1
init = (1, 1)
from mdp import MDP
problem = MDP(
init,
act_list,
terminals,
transitions,
rewards,
states,
gamma
)
# currently aima-code/mdp.py has a bug in actions() method
def fix_actions(self, state):
if state in self.terminals:
return []
else:
return self.actlist
bound_actions = types.MethodType(fix_actions, problem)
problem.actions = bound_actions
from mdp import value_iteration
iteration = value_iteration_stepper(problem)
print(next(iteration)) # iteration 1
print('\n\n')
print(next(iteration)) # iteration 2
print('\n\n')
print(next(iteration))
@ssardina
Copy link

Hi @thundergolfer:

I get this:

[ssardina@Thinkpad-X1 aima-python.git]$ python3 tute_08_value_iteration.py 
Traceback (most recent call last):
  File "tute_08_value_iteration.py", line 116, in <module>
    print(next(iteration)) # iteration 1
  File "tute_08_value_iteration.py", line 13, in value_iteration_stepper
    U1[s] = R(s) + gamma * max((sum(p*U[s1] for (p, s1) in T(s, a)) for a in mdp.actions(s)), default=0)
  File "tute_08_value_iteration.py", line 13, in <genexpr>
    U1[s] = R(s) + gamma * max((sum(p*U[s1] for (p, s1) in T(s, a)) for a in mdp.actions(s)), default=0)
  File "/home/ssardina/git/soft/courses/aima-python.git/mdp.py", line 62, in T
    return self.transitions[state][action]
KeyError: None

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment