Skip to content

Instantly share code, notes, and snippets.

@keymone keymone/cart_pole.py
Last active Sep 29, 2016

Embed
What would you like to do?
import gym
import numpy as np
import math
def atg01(x):
return 0.5 + math.atan(x) / math.pi
env = gym.make('CartPole-v0')
best = 1
best_cs = (np.random.rand(4) * 2 - 1)
learn_rate = 0.1
for _ in xrange(200):
env.reset()
current = 0
current_act = env.action_space.sample()
current_cs = best_cs + (np.random.rand(4) * 2 - 1) * learn_rate
tries = 0
done = False
while not done and tries < 1000:
state, inc, done, _ = env.step(current_act)
current_dot = atg01(np.dot(state, current_cs))
current_act = int(round(current_dot))
current += inc
tries += 1
# env.render()
proximity = float(current) / float(best)
if proximity >= 1:
best = current
proximity = 1.0
proximity *= proximity * proximity
learn_rate = 1.0 - proximity
best_cs = current_cs * proximity + best_cs * (1.0 - proximity)
print (current, best, "%.2f" % learn_rate, best_cs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.