serser/stochastic_motion.py

## stochastic_motion.py
# https://classroom.udacity.com/courses/cs373/lessons/48532756/concepts/487024740923
# --------------
# USER INSTRUCTIONS
#
# Write a function called stochastic_value that
# returns two grids. The first grid, value, should
# contain the computed value of each cell as shown
# in the video. The second grid, policy, should
# contain the optimum policy for each cell.
#
# --------------
# GRADING NOTES
#
# We will be calling your stochastic_value function
# with several different grids and different values
# of success_prob, collision_cost, and cost_step.
# In order to be marked correct, your function must
# RETURN (it does not have to print) two grids,
# value and policy.
#
# When grading your value grid, we will compare the
# value of each cell with the true value according
# to this model. If your answer for each cell
# is sufficiently close to the correct answer
# (within 0.001), you will be marked as correct.

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid.

# ---------------------------------------------
#  Modify the function stochastic_value below
# ---------------------------------------------

def is_blocked(x,y, grid):
    return (x==-1) or (y==-1) or (x == len(grid)) or (y == len(grid[0])) or (grid[x][y]==1)

def stochastic_value(grid,goal,cost_step,collision_cost,success_prob):
    failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
    value = [[collision_cost for col in range(len(grid[0]))] for row in range(len(grid))]
    policy = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]

    change = True

    gx,gy = goal
    value[gx][gy] = 0
    policy[gx][gy] = '*'

    while change:
        change = False
        for row in range(len(grid)):
            for col in range(len(grid[0])):
                for i,[dx,dy] in enumerate(delta):
                    nx = row+dx
                    ny = col+dy
                    nv = cost_step
                    if (-1<=nx<= len(grid)) and (-1<=ny<=len(grid[0])) and (grid[row][col]!=1) :
                        if is_blocked(nx,ny,grid):
                            # yep, successfully run into collision
                            nv += collision_cost*success_prob
                        else:
                            nv += value[nx][ny]*success_prob
                        if i%2==0: #up/down
                            dlx,dly = delta[1]
                            if is_blocked(row+dlx, col+dly, grid):
                                nv += collision_cost * failure_prob
                            else:
                                nv += value[row+dlx][col+dly] * failure_prob

                            drx,dry = delta[3]

                            if is_blocked(row+drx, col+dry, grid):
                                nv += collision_cost * failure_prob
                            else:
                                nv += value[row+drx][col+dry] * failure_prob

                        else: # left/right
                            dlx,dly = delta[0]
                            if is_blocked(row+dlx, col+dly, grid):
                                nv += collision_cost * failure_prob
                            else:
                                nv += value[row+dlx][col+dly] * failure_prob

                            drx,dry = delta[2]

                            if is_blocked(row+drx, col+dry, grid):
                                nv += collision_cost * failure_prob
                            else:
                                nv += value[row+drx][col+dry] * failure_prob
                        if nv < value[row][col]:
                            value[row][col] = nv
                            policy[row][col] = delta_name[i]
                            change = True

    return value, policy

# ---------------------------------------------
#  Use the code below to test your solution
# ---------------------------------------------

grid = [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 1, 1, 0]]
goal = [0, len(grid[0])-1] # Goal is in top right corner
cost_step = 1
collision_cost = 100
success_prob = 0.5

value,policy = stochastic_value(grid,goal,cost_step,collision_cost,success_prob)
for row in value:
    print row
for row in policy:
    print row

# Expected outputs:
#
# [57.9029, 40.2784, 26.0665,  0.0000]
# [47.0547, 36.5722, 29.9937, 27.2698]
# [53.1715, 42.0228, 37.7755, 45.0916]
# [77.5858, 100.00, 100.00, 73.5458]
#
# ['>', 'v', 'v', '*']
# ['>', '>', '^', '<']
# ['>', '^', '^', '<']
# ['^', ' ', ' ', '^']
	# https://classroom.udacity.com/courses/cs373/lessons/48532756/concepts/487024740923
	# --------------
	# USER INSTRUCTIONS
	#
	# Write a function called stochastic_value that
	# returns two grids. The first grid, value, should
	# contain the computed value of each cell as shown
	# in the video. The second grid, policy, should
	# contain the optimum policy for each cell.
	#
	# --------------
	# GRADING NOTES
	#
	# We will be calling your stochastic_value function
	# with several different grids and different values
	# of success_prob, collision_cost, and cost_step.
	# In order to be marked correct, your function must
	# RETURN (it does not have to print) two grids,
	# value and policy.
	#
	# When grading your value grid, we will compare the
	# value of each cell with the true value according
	# to this model. If your answer for each cell
	# is sufficiently close to the correct answer
	# (within 0.001), you will be marked as correct.

	delta = [[-1, 0 ], # go up
	[ 0, -1], # go left
	[ 1, 0 ], # go down
	[ 0, 1 ]] # go right

	delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid.

	# ---------------------------------------------
	# Modify the function stochastic_value below
	# ---------------------------------------------

	def is_blocked(x,y, grid):
	return (x==-1) or (y==-1) or (x == len(grid)) or (y == len(grid[0])) or (grid[x][y]==1)

	def stochastic_value(grid,goal,cost_step,collision_cost,success_prob):
	failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
	value = [[collision_cost for col in range(len(grid[0]))] for row in range(len(grid))]
	policy = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]

	change = True

	gx,gy = goal
	value[gx][gy] = 0
	policy[gx][gy] = '*'

	while change:
	change = False
	for row in range(len(grid)):
	for col in range(len(grid[0])):
	for i,[dx,dy] in enumerate(delta):
	nx = row+dx
	ny = col+dy
	nv = cost_step
	if (-1<=nx<= len(grid)) and (-1<=ny<=len(grid[0])) and (grid[row][col]!=1) :
	if is_blocked(nx,ny,grid):
	# yep, successfully run into collision
	nv += collision_cost*success_prob
	else:
	nv += value[nx][ny]*success_prob
	if i%2==0: #up/down
	dlx,dly = delta[1]
	if is_blocked(row+dlx, col+dly, grid):
	nv += collision_cost * failure_prob
	else:
	nv += value[row+dlx][col+dly] * failure_prob

	drx,dry = delta[3]

	if is_blocked(row+drx, col+dry, grid):
	nv += collision_cost * failure_prob
	else:
	nv += value[row+drx][col+dry] * failure_prob

	else: # left/right
	dlx,dly = delta[0]
	if is_blocked(row+dlx, col+dly, grid):
	nv += collision_cost * failure_prob
	else:
	nv += value[row+dlx][col+dly] * failure_prob

	drx,dry = delta[2]

	if is_blocked(row+drx, col+dry, grid):
	nv += collision_cost * failure_prob
	else:
	nv += value[row+drx][col+dry] * failure_prob
	if nv < value[row][col]:
	value[row][col] = nv
	policy[row][col] = delta_name[i]
	change = True

	return value, policy

	# ---------------------------------------------
	# Use the code below to test your solution
	# ---------------------------------------------

	grid = [[0, 0, 0, 0],
	[0, 0, 0, 0],
	[0, 0, 0, 0],
	[0, 1, 1, 0]]
	goal = [0, len(grid[0])-1] # Goal is in top right corner
	cost_step = 1
	collision_cost = 100
	success_prob = 0.5

	value,policy = stochastic_value(grid,goal,cost_step,collision_cost,success_prob)
	for row in value:
	print row
	for row in policy:
	print row

	# Expected outputs:
	#
	# [57.9029, 40.2784, 26.0665, 0.0000]
	# [47.0547, 36.5722, 29.9937, 27.2698]
	# [53.1715, 42.0228, 37.7755, 45.0916]
	# [77.5858, 100.00, 100.00, 73.5458]
	#
	# ['>', 'v', 'v', '*']
	# ['>', '>', '^', '<']
	# ['>', '^', '^', '<']
	# ['^', ' ', ' ', '^']