Created
December 13, 2017 08:53
-
-
Save serser/23ce9fb4fadbcaa6f2c388632ab90f23 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://classroom.udacity.com/courses/cs373/lessons/48532756/concepts/487024740923 | |
# -------------- | |
# USER INSTRUCTIONS | |
# | |
# Write a function called stochastic_value that | |
# returns two grids. The first grid, value, should | |
# contain the computed value of each cell as shown | |
# in the video. The second grid, policy, should | |
# contain the optimum policy for each cell. | |
# | |
# -------------- | |
# GRADING NOTES | |
# | |
# We will be calling your stochastic_value function | |
# with several different grids and different values | |
# of success_prob, collision_cost, and cost_step. | |
# In order to be marked correct, your function must | |
# RETURN (it does not have to print) two grids, | |
# value and policy. | |
# | |
# When grading your value grid, we will compare the | |
# value of each cell with the true value according | |
# to this model. If your answer for each cell | |
# is sufficiently close to the correct answer | |
# (within 0.001), you will be marked as correct. | |
delta = [[-1, 0 ], # go up | |
[ 0, -1], # go left | |
[ 1, 0 ], # go down | |
[ 0, 1 ]] # go right | |
delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid. | |
# --------------------------------------------- | |
# Modify the function stochastic_value below | |
# --------------------------------------------- | |
def is_blocked(x,y, grid): | |
return (x==-1) or (y==-1) or (x == len(grid)) or (y == len(grid[0])) or (grid[x][y]==1) | |
def stochastic_value(grid,goal,cost_step,collision_cost,success_prob): | |
failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob | |
value = [[collision_cost for col in range(len(grid[0]))] for row in range(len(grid))] | |
policy = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))] | |
change = True | |
gx,gy = goal | |
value[gx][gy] = 0 | |
policy[gx][gy] = '*' | |
while change: | |
change = False | |
for row in range(len(grid)): | |
for col in range(len(grid[0])): | |
for i,[dx,dy] in enumerate(delta): | |
nx = row+dx | |
ny = col+dy | |
nv = cost_step | |
if (-1<=nx<= len(grid)) and (-1<=ny<=len(grid[0])) and (grid[row][col]!=1) : | |
if is_blocked(nx,ny,grid): | |
# yep, successfully run into collision | |
nv += collision_cost*success_prob | |
else: | |
nv += value[nx][ny]*success_prob | |
if i%2==0: #up/down | |
dlx,dly = delta[1] | |
if is_blocked(row+dlx, col+dly, grid): | |
nv += collision_cost * failure_prob | |
else: | |
nv += value[row+dlx][col+dly] * failure_prob | |
drx,dry = delta[3] | |
if is_blocked(row+drx, col+dry, grid): | |
nv += collision_cost * failure_prob | |
else: | |
nv += value[row+drx][col+dry] * failure_prob | |
else: # left/right | |
dlx,dly = delta[0] | |
if is_blocked(row+dlx, col+dly, grid): | |
nv += collision_cost * failure_prob | |
else: | |
nv += value[row+dlx][col+dly] * failure_prob | |
drx,dry = delta[2] | |
if is_blocked(row+drx, col+dry, grid): | |
nv += collision_cost * failure_prob | |
else: | |
nv += value[row+drx][col+dry] * failure_prob | |
if nv < value[row][col]: | |
value[row][col] = nv | |
policy[row][col] = delta_name[i] | |
change = True | |
return value, policy | |
# --------------------------------------------- | |
# Use the code below to test your solution | |
# --------------------------------------------- | |
grid = [[0, 0, 0, 0], | |
[0, 0, 0, 0], | |
[0, 0, 0, 0], | |
[0, 1, 1, 0]] | |
goal = [0, len(grid[0])-1] # Goal is in top right corner | |
cost_step = 1 | |
collision_cost = 100 | |
success_prob = 0.5 | |
value,policy = stochastic_value(grid,goal,cost_step,collision_cost,success_prob) | |
for row in value: | |
print row | |
for row in policy: | |
print row | |
# Expected outputs: | |
# | |
# [57.9029, 40.2784, 26.0665, 0.0000] | |
# [47.0547, 36.5722, 29.9937, 27.2698] | |
# [53.1715, 42.0228, 37.7755, 45.0916] | |
# [77.5858, 100.00, 100.00, 73.5458] | |
# | |
# ['>', 'v', 'v', '*'] | |
# ['>', '>', '^', '<'] | |
# ['>', '^', '^', '<'] | |
# ['^', ' ', ' ', '^'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment