Skip to content

Instantly share code, notes, and snippets.

@serser
Created December 13, 2017 08:53
Show Gist options
  • Save serser/23ce9fb4fadbcaa6f2c388632ab90f23 to your computer and use it in GitHub Desktop.
Save serser/23ce9fb4fadbcaa6f2c388632ab90f23 to your computer and use it in GitHub Desktop.
# https://classroom.udacity.com/courses/cs373/lessons/48532756/concepts/487024740923
# --------------
# USER INSTRUCTIONS
#
# Write a function called stochastic_value that
# returns two grids. The first grid, value, should
# contain the computed value of each cell as shown
# in the video. The second grid, policy, should
# contain the optimum policy for each cell.
#
# --------------
# GRADING NOTES
#
# We will be calling your stochastic_value function
# with several different grids and different values
# of success_prob, collision_cost, and cost_step.
# In order to be marked correct, your function must
# RETURN (it does not have to print) two grids,
# value and policy.
#
# When grading your value grid, we will compare the
# value of each cell with the true value according
# to this model. If your answer for each cell
# is sufficiently close to the correct answer
# (within 0.001), you will be marked as correct.
delta = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid.
# ---------------------------------------------
# Modify the function stochastic_value below
# ---------------------------------------------
def is_blocked(x,y, grid):
return (x==-1) or (y==-1) or (x == len(grid)) or (y == len(grid[0])) or (grid[x][y]==1)
def stochastic_value(grid,goal,cost_step,collision_cost,success_prob):
failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
value = [[collision_cost for col in range(len(grid[0]))] for row in range(len(grid))]
policy = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
change = True
gx,gy = goal
value[gx][gy] = 0
policy[gx][gy] = '*'
while change:
change = False
for row in range(len(grid)):
for col in range(len(grid[0])):
for i,[dx,dy] in enumerate(delta):
nx = row+dx
ny = col+dy
nv = cost_step
if (-1<=nx<= len(grid)) and (-1<=ny<=len(grid[0])) and (grid[row][col]!=1) :
if is_blocked(nx,ny,grid):
# yep, successfully run into collision
nv += collision_cost*success_prob
else:
nv += value[nx][ny]*success_prob
if i%2==0: #up/down
dlx,dly = delta[1]
if is_blocked(row+dlx, col+dly, grid):
nv += collision_cost * failure_prob
else:
nv += value[row+dlx][col+dly] * failure_prob
drx,dry = delta[3]
if is_blocked(row+drx, col+dry, grid):
nv += collision_cost * failure_prob
else:
nv += value[row+drx][col+dry] * failure_prob
else: # left/right
dlx,dly = delta[0]
if is_blocked(row+dlx, col+dly, grid):
nv += collision_cost * failure_prob
else:
nv += value[row+dlx][col+dly] * failure_prob
drx,dry = delta[2]
if is_blocked(row+drx, col+dry, grid):
nv += collision_cost * failure_prob
else:
nv += value[row+drx][col+dry] * failure_prob
if nv < value[row][col]:
value[row][col] = nv
policy[row][col] = delta_name[i]
change = True
return value, policy
# ---------------------------------------------
# Use the code below to test your solution
# ---------------------------------------------
grid = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 1, 1, 0]]
goal = [0, len(grid[0])-1] # Goal is in top right corner
cost_step = 1
collision_cost = 100
success_prob = 0.5
value,policy = stochastic_value(grid,goal,cost_step,collision_cost,success_prob)
for row in value:
print row
for row in policy:
print row
# Expected outputs:
#
# [57.9029, 40.2784, 26.0665, 0.0000]
# [47.0547, 36.5722, 29.9937, 27.2698]
# [53.1715, 42.0228, 37.7755, 45.0916]
# [77.5858, 100.00, 100.00, 73.5458]
#
# ['>', 'v', 'v', '*']
# ['>', '>', '^', '<']
# ['>', '^', '^', '<']
# ['^', ' ', ' ', '^']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment