Skip to content

Instantly share code, notes, and snippets.

@akarazeev
Last active February 19, 2017 12:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akarazeev/65c72b3a6006a26d02727f1a046bc250 to your computer and use it in GitHub Desktop.
Save akarazeev/65c72b3a6006a26d02727f1a046bc250 to your computer and use it in GitHub Desktop.
import numpy as np
import gym
from gym import wrappers
env = gym.make('CartPole-v0')
nsteps = 200
def run_episode(env, parameters):
observation = env.reset()
totalreward = 0
for _ in range(nsteps):
env.render()
action = 0 if np.matmul(parameters, observation) < 0 else 1
observation, reward, done, info = env.step(action)
totalreward += reward
if done:
break
env.close()
return totalreward
bestparams = None
bestreward = 0
for _ in range(10000):
parameters = np.random.rand(4) * 2 - 1
tmpreward = run_episode(env, parameters)
print(tmpreward)
if tmpreward > bestreward:
bestreward = tmpreward
bestparams = parameters
if tmpreward == nsteps:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment