Defining and solving a small POMDP using pomdp-py
Example of defining a small, tabular POMDP and solving
it using Cassandra's pomdp-solve value iteration solver.
Refer to documentation:
import pomdp_py
def cryingbaby():
"""This is a POMDP defined in the Algorithms for Decision Making book
by M. J. Kochenderfer et al. in section F.7"""
S = ['hungry', 'sated']
A = ['feed', 'sing', 'ignore']
Z = ['crying', 'quiet']
T = pomdp_py.TabularTransitionModel({
# state, action, next state
('hungry', 'feed', 'sated'): 1.0,
('hungry', 'feed', 'hungry'): 0.0,
('hungry', 'sing', 'hungry'): 1.0,
('hungry', 'sing', 'sated'): 0.0,
('hungry', 'ignore', 'hungry'): 1.0,
('hungry', 'ignore', 'sated'): 0.0,
('sated', 'feed', 'sated'): 1.0,
('sated', 'feed', 'hungry'): 0.0,
('sated', 'sing', 'hungry'): 0.1,
('sated', 'sing', 'sated'): 0.9,
('sated', 'ignore', 'hungry'): 0.1,
('sated', 'ignore', 'sated'): 0.9
O = pomdp_py.TabularObservationModel({
# state, action, observation
('hungry', 'feed', 'crying'): 0.8,
('hungry', 'feed', 'quiet'): 0.2,
('hungry', 'sing', 'crying'): 0.9,
('hungry', 'sing', 'quiet'): 0.1,
('hungry', 'ignore', 'crying'): 0.8,
('hungry', 'ignore', 'quiet'): 0.2,
('sated', 'feed', 'crying'): 0.1,
('sated', 'feed', 'quiet'): 0.9,
('sated', 'sing', 'crying'): 0.1,
('sated', 'sing', 'quiet'): 0.9,
('sated', 'ignore', 'crying'): 0.1,
('sated', 'ignore', 'quiet'): 0.9,
R = pomdp_py.TabularRewardModel({
# state, action
('hungry', 'feed'): -10 - 5,
('hungry', 'sing'): -10 - 0.5,
('hungry', 'ignore'): -10,
('sated', 'feed'): -5,
('sated', 'sing'): -0.5,
('sated', 'ignore'): 0
gamma = 0.9
return S, A, Z, T, O, R, gamma
if __name__ == "__main__":
S, A, Z, T, O, R, gamma = cryingbaby()
pi = pomdp_py.UniformPolicyModel(A)
b0 = pomdp_py.Histogram({"hungry": 0.22,
"sated": 0.78})
agent = pomdp_py.Agent(b0, pi, T, O, R)
horizon = 5
filename = "cryingbaby.POMDP"
pomdp_py.to_pomdp_file(agent, filename, discount_factor=gamma)
# path to the pomdp-solve binary
pomdp_solve_path = "/home/kaiyuzh/software/pomdp-solve-5.4/src/pomdp-solve"
policy = pomdp_py.vi_pruning(agent, pomdp_solve_path,
options=["-horizon", horizon],
print(pomdp_py.value(agent.belief, S, A, Z, T, O, R, gamma, horizon=horizon))
state = "hungry" # true initial state
for step in range(10):
action = policy.plan(agent)
next_state = T.sample(state, action)
reward = R.sample(state, action, next_state)
observation = O.sample(next_state, action)
# print
print(f"step = {step+1}"
f"\t|\taction: {action}"
f"\t|\tobservation: {observation}"
f"\t|\tstate: {state} "
f"\t|\treward: {reward}"
f"\t|\tbelief: {agent.belief}")
# update agent belief
next_belief = pomdp_py.belief_update(agent.belief, action, observation, T, O)
# apply state transition to the environment
state = next_state
