Skip to content

Instantly share code, notes, and snippets.

@tocom242242
Created September 3, 2019 02:42
Show Gist options
  • Save tocom242242/b70b792d804e199b966c0ba9fcee0c94 to your computer and use it in GitHub Desktop.
Save tocom242242/b70b792d804e199b966c0ba9fcee0c94 to your computer and use it in GitHub Desktop.
grid world
class GridWorld:
def __init__(self):
self.filed_type = {
"N": 0, # 通常
"G": 1, # ゴール
"W": 2, # 壁
"T": 3, # トラップ
}
self.actions = {
"UP": 0,
"DOWN": 1,
"LEFT": 2,
"RIGHT": 3
}
self.map = [[3, 2, 0, 1],
[0, 0, 0, 2],
[0, 0, 2, 0],
[2, 0, 2, 0],
[0, 0, 0, 0]]
self.start_pos = 0, 4 # エージェントのスタート地点(x, y)
self.agent_pos = copy.deepcopy(self.start_pos) # エージェントがいる地点
def step(self, action):
"""
行動の実行
状態, 報酬、ゴールしたかを返却
"""
to_x, to_y = copy.deepcopy(self.agent_pos)
# 移動可能かどうかの確認。移動不可能であれば、ポジションはそのままにマイナス報酬
if self._is_possible_action(to_x, to_y, action) == False:
return self.agent_pos, -1, False
if action == self.actions["UP"]:
to_y += -1
elif action == self.actions["DOWN"]:
to_y += 1
elif action == self.actions["LEFT"]:
to_x += -1
elif action == self.actions["RIGHT"]:
to_x += 1
is_goal = self._is_end_episode(to_x, to_y) # エピソードの終了の確認
reward = self._compute_reward(to_x, to_y)
self.agent_pos = to_x, to_y
return self.agent_pos, reward, is_goal
def _is_end_episode(self, x, y):
"""
x, yがエピソードの終了かの確認。
"""
if self.map[y][x] == self.filed_type["G"]: # ゴール
return True
elif self.map[y][x] == self.filed_type["T"]: # トラップ
return True
else:
return False
def _is_wall(self, x, y):
"""
x, yが壁かどうかの確認
"""
if self.map[y][x] == self.filed_type["W"]:
return True
else:
return False
def _is_possible_action(self, x, y, action):
"""
実行可能な行動かどうかの判定
"""
to_x = x
to_y = y
if action == self.actions["UP"]:
to_y += -1
elif action == self.actions["DOWN"]:
to_y += 1
elif action == self.actions["LEFT"]:
to_x += -1
elif action == self.actions["RIGHT"]:
to_x += 1
if len(self.map) <= to_y or 0 > to_y:
return False
elif len(self.map[0]) <= to_x or 0 > to_x:
return False
elif self._is_wall(to_x, to_y):
return False
return True
def _compute_reward(self, x, y):
if self.map[y][x] == self.filed_type["N"]:
return 0
elif self.map[y][x] == self.filed_type["G"]:
return 100
elif self.map[y][x] == self.filed_type["T"]:
return -100
def reset(self):
self.agent_pos = self.start_pos
return self.start_pos
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment