Skip to content

Instantly share code, notes, and snippets.

@serser
Created December 12, 2017 03:50
Show Gist options
  • Save serser/5efa25f8bef0c64cefeae9835495dc97 to your computer and use it in GitHub Desktop.
Save serser/5efa25f8bef0c64cefeae9835495dc97 to your computer and use it in GitHub Desktop.
# ----------
# User Instructions:
#
# Implement the function optimum_policy2D below.
#
# You are given a car in grid with initial state
# init. Your task is to compute and return the car's
# optimal path to the position specified in goal;
# the costs for each motion are as defined in cost.
#
# There are four motion directions: up, left, down, and right.
# Increasing the index in this array corresponds to making a
# a left turn, and decreasing the index corresponds to making a
# right turn.
forward = [[-1, 0], # go up
[ 0, -1], # go left
[ 1, 0], # go down
[ 0, 1]] # go right
forward_name = ['up', 'left', 'down', 'right']
# action has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']
# EXAMPLE INPUTS:
# grid format:
# 0 = navigable space
# 1 = unnavigable space
grid = [[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 1, 1],
[1, 1, 1, 0, 1, 1]]
init = [4, 3, 0] # given in the form [row,col,direction]
# direction = 0: up
# 1: left
# 2: down
# 3: right
goal = [2, 0] # given in the form [row,col]
cost = [2, 1, 20] # cost has 3 values, corresponding to making
# a right turn, no turn, and a left turn
# grid = [[0, 0, 0, 0, 1, 1],
# [0, 0, 1, 0, 0, 0],
# [0, 0, 0, 0, 1, 0],
# [0, 0, 1, 1, 1, 0],
# [0, 0, 0, 0, 1, 0]]
# init = [4, 5, 0]
# goal = [4, 3]
# cost = [1, 1, 1]
# EXAMPLE OUTPUT:
# calling optimum_policy2D with the given parameters should return
# [[' ', ' ', ' ', 'R', '#', 'R'],
# [' ', ' ', ' ', '#', ' ', '#'],
# ['*', '#', '#', '#', '#', 'R'],
# [' ', ' ', ' ', '#', ' ', ' '],
# [' ', ' ', ' ', '#', ' ', ' ']]
# ----------
# ----------------------------------------
# modify code below
# ----------------------------------------
def optimum_policy2D(grid,init,goal,cost):
# keeps the minimum cost to goal
value = [[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 for col in range(len(grid[0]))] for row in range(len(grid))]]
# whether it has been in the open list
visited = [[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))]]
# policy in 3d
policy = [[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
[[' ' for col in range(len(grid[0]))] for row in range(len(grid))]]
# projection to 2d
policy2D = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
# previous position
prepos = [[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
[[[] for col in range(len(grid[0]))] for row in range(len(grid))]]
# set goal value
gx,gy = goal
for i,f in enumerate(forward):
value[i][gx][gy] = 0
done=False
opened=[]
cx,cy,cd = init
cst = 0
while not done:
for i,a in enumerate(action):
nd = (cd + a) % 4
# moves only according to its direction
nx = cx + forward[nd][0]
ny = cy + forward[nd][1]
nc = cst + cost[i]
# within range
if (0<=nx<len(grid)) and (0<=ny<len(grid[0])):
# not visited, and not goal
if visited[nd][nx][ny] == 0:
value[nd][nx][ny] = nc
# record action to nx,ny as well
opened.append([nc, i, nd, nx, ny])
#print opened
if len(opened) == 0:
return 'fail'
else:
opened.sort()
opened.reverse()
nxt = opened.pop()
cst, ca, cd, cx, cy = nxt
visited[cd][cx][cy] = 1
# find the previous position and action
# and update policy since we found a better one
pd = (cd - action[ca]) % 4
px = cx - forward[cd][0]
py = cy - forward[cd][1]
policy[pd][px][py] = action_name[ca]
prepos[cd][cx][cy] = [pd,px,py,ca]
#print cst, cx,cy,cd, action_name[ca], px,py,pd
if [cx, cy] == goal:
#print 'found goal'
done = True
policy[cd][cx][cy] = '*'
policy2D[cx][cy] = '*'
cx,cy = goal
done = False
cnt = 0
cd = 0
for i in range(4):
if policy[i][cx][cy] == '*':
cd = i
while not done:
cd,cx,cy,pa = prepos[cd][cx][cy]
# print action_name[pa]
# if cnt<20:
# cnt+=1
# else:
# break
if [cx,cy, init[2]] == init:
done = True
policy2D[cx][cy] = action_name[pa]
return policy2D
for r in optimum_policy2D(grid,init,goal,cost):
print r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment