Skip to content

Instantly share code, notes, and snippets.

@taotao54321
Created November 7, 2016 11:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taotao54321/3be8abcaf3937e2c66eeef669556b4ef to your computer and use it in GitHub Desktop.
Save taotao54321/3be8abcaf3937e2c66eeef669556b4ef to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Just a handmade algorithm. NOT AI.
import gym
### MAP ###
# SFFF
# FHFH
# FFFH
# HFFG
###########
# 0:L, 1:D, 2:R, 3:U
ACTIONS = (
0, 3, 3, 3,
0, -1, 0, -1,
3, 1, 0, -1,
-1, 2, 1, -1,
)
EPISODE_COUNT = 1000
DEBUG = False
#DEBUG = True
def get_action(ob):
act = ACTIONS[ob]
assert 0 <= act <= 3
return act
def main():
env = gym.make("FrozenLake-v0")
env.monitor.start("rec")
reward_total = 0.0
for episode in range(EPISODE_COUNT):
ob = env.reset()
if DEBUG: env.render()
while True:
ob, reward, done, info = env.step(get_action(ob))
if DEBUG:
env.render()
print(ob, reward, done, info)
if done:
reward_total += reward
break
env.monitor.close()
print("episodes: {}".format(EPISODE_COUNT))
print("total reward: {}".format(reward_total))
print("average reward: {:.2f}".format(reward_total / EPISODE_COUNT))
if __name__ == "__main__": main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment