Created
September 3, 2019 02:42
-
-
Save tocom242242/b70b792d804e199b966c0ba9fcee0c94 to your computer and use it in GitHub Desktop.
grid world
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GridWorld: | |
def __init__(self): | |
self.filed_type = { | |
"N": 0, # 通常 | |
"G": 1, # ゴール | |
"W": 2, # 壁 | |
"T": 3, # トラップ | |
} | |
self.actions = { | |
"UP": 0, | |
"DOWN": 1, | |
"LEFT": 2, | |
"RIGHT": 3 | |
} | |
self.map = [[3, 2, 0, 1], | |
[0, 0, 0, 2], | |
[0, 0, 2, 0], | |
[2, 0, 2, 0], | |
[0, 0, 0, 0]] | |
self.start_pos = 0, 4 # エージェントのスタート地点(x, y) | |
self.agent_pos = copy.deepcopy(self.start_pos) # エージェントがいる地点 | |
def step(self, action): | |
""" | |
行動の実行 | |
状態, 報酬、ゴールしたかを返却 | |
""" | |
to_x, to_y = copy.deepcopy(self.agent_pos) | |
# 移動可能かどうかの確認。移動不可能であれば、ポジションはそのままにマイナス報酬 | |
if self._is_possible_action(to_x, to_y, action) == False: | |
return self.agent_pos, -1, False | |
if action == self.actions["UP"]: | |
to_y += -1 | |
elif action == self.actions["DOWN"]: | |
to_y += 1 | |
elif action == self.actions["LEFT"]: | |
to_x += -1 | |
elif action == self.actions["RIGHT"]: | |
to_x += 1 | |
is_goal = self._is_end_episode(to_x, to_y) # エピソードの終了の確認 | |
reward = self._compute_reward(to_x, to_y) | |
self.agent_pos = to_x, to_y | |
return self.agent_pos, reward, is_goal | |
def _is_end_episode(self, x, y): | |
""" | |
x, yがエピソードの終了かの確認。 | |
""" | |
if self.map[y][x] == self.filed_type["G"]: # ゴール | |
return True | |
elif self.map[y][x] == self.filed_type["T"]: # トラップ | |
return True | |
else: | |
return False | |
def _is_wall(self, x, y): | |
""" | |
x, yが壁かどうかの確認 | |
""" | |
if self.map[y][x] == self.filed_type["W"]: | |
return True | |
else: | |
return False | |
def _is_possible_action(self, x, y, action): | |
""" | |
実行可能な行動かどうかの判定 | |
""" | |
to_x = x | |
to_y = y | |
if action == self.actions["UP"]: | |
to_y += -1 | |
elif action == self.actions["DOWN"]: | |
to_y += 1 | |
elif action == self.actions["LEFT"]: | |
to_x += -1 | |
elif action == self.actions["RIGHT"]: | |
to_x += 1 | |
if len(self.map) <= to_y or 0 > to_y: | |
return False | |
elif len(self.map[0]) <= to_x or 0 > to_x: | |
return False | |
elif self._is_wall(to_x, to_y): | |
return False | |
return True | |
def _compute_reward(self, x, y): | |
if self.map[y][x] == self.filed_type["N"]: | |
return 0 | |
elif self.map[y][x] == self.filed_type["G"]: | |
return 100 | |
elif self.map[y][x] == self.filed_type["T"]: | |
return -100 | |
def reset(self): | |
self.agent_pos = self.start_pos | |
return self.start_pos |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment