Skip to content

Instantly share code, notes, and snippets.

@leonidk
Last active February 15, 2018 21:19
Show Gist options
  • Save leonidk/cd763e88cf9301eb6e32eb5980ec5bdf to your computer and use it in GitHub Desktop.
Save leonidk/cd763e88cf9301eb6e32eb5980ec5bdf to your computer and use it in GitHub Desktop.
Random Controllers for OpenAI Gym
import gym
import numpy as np
import random
env = gym.make('Pendulum-v0')
dim = env.observation_space.shape[0] + 1
params = int(dim + (dim*(dim-1))/2)
# linear controller with pairwise features
def quad_control(w,ob,t):
obs = list(ob) + [1]
for i in range(dim):
for j in range(i+1,dim):
obs.append(obs[i]*obs[j])
s = w * np.array(obs)
return np.clip([s.sum()],-2.0,2.0)
# run an episode
def func(x):
errs = []
for i_episode in range(8):
err = 0.0
observation = env.reset()
for t in range(200):
action = quad_control(x,observation,t)
observation, reward, done, info = env.step(action)
err -= reward
if done: # early exit
break
errs.append(err)
return sorted(errs)[6]
# try 1000 random configurations
best_score = float('inf')
for _ in range(1000):
x = 10.0*(np.random.random(params)*2.0 - 1.0)
s = func(x)
if s < best_score:
best_score = s
resx = x
print('{}\t{:.2f}'.format(_,s))
if s < 500:
print('early exit!')
break
print(_,resx)
# see if it worked
for i_episode in range(10):
observation = env.reset()
err = 0.0
for t in range(200):
env.render()
action = quad_control(resx,observation,t)
observation, reward, done, info = env.step(action)
err -= reward
if done:
print("Episode finished after {} timesteps".format(t+1))
break
print('{:.2f}'.format(err))
print("we're done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment