Skip to content

Instantly share code, notes, and snippets.

@mks-m
Last active September 29, 2016 00:44
Show Gist options
  • Save mks-m/8cecb0a3548a0ddf3227d701111e0ac3 to your computer and use it in GitHub Desktop.
Save mks-m/8cecb0a3548a0ddf3227d701111e0ac3 to your computer and use it in GitHub Desktop.
import gym
import numpy as np
import math
def atg01(x):
return 0.5 + math.atan(x) / math.pi
env = gym.make('CartPole-v0')
best = 1
best_cs = (np.random.rand(4) * 2 - 1)
learn_rate = 0.1
for _ in xrange(200):
env.reset()
current = 0
current_act = env.action_space.sample()
current_cs = best_cs + (np.random.rand(4) * 2 - 1) * learn_rate
tries = 0
done = False
while not done and tries < 1000:
state, inc, done, _ = env.step(current_act)
current_dot = atg01(np.dot(state, current_cs))
current_act = int(round(current_dot))
current += inc
tries += 1
# env.render()
proximity = float(current) / float(best)
if proximity >= 1:
best = current
proximity = 1.0
proximity *= proximity * proximity
learn_rate = 1.0 - proximity
best_cs = current_cs * proximity + best_cs * (1.0 - proximity)
print (current, best, "%.2f" % learn_rate, best_cs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment