Last active
February 15, 2018 21:19
-
-
Save leonidk/cd763e88cf9301eb6e32eb5980ec5bdf to your computer and use it in GitHub Desktop.
Random Controllers for OpenAI Gym
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import numpy as np | |
import random | |
env = gym.make('Pendulum-v0') | |
dim = env.observation_space.shape[0] + 1 | |
params = int(dim + (dim*(dim-1))/2) | |
# linear controller with pairwise features | |
def quad_control(w,ob,t): | |
obs = list(ob) + [1] | |
for i in range(dim): | |
for j in range(i+1,dim): | |
obs.append(obs[i]*obs[j]) | |
s = w * np.array(obs) | |
return np.clip([s.sum()],-2.0,2.0) | |
# run an episode | |
def func(x): | |
errs = [] | |
for i_episode in range(8): | |
err = 0.0 | |
observation = env.reset() | |
for t in range(200): | |
action = quad_control(x,observation,t) | |
observation, reward, done, info = env.step(action) | |
err -= reward | |
if done: # early exit | |
break | |
errs.append(err) | |
return sorted(errs)[6] | |
# try 1000 random configurations | |
best_score = float('inf') | |
for _ in range(1000): | |
x = 10.0*(np.random.random(params)*2.0 - 1.0) | |
s = func(x) | |
if s < best_score: | |
best_score = s | |
resx = x | |
print('{}\t{:.2f}'.format(_,s)) | |
if s < 500: | |
print('early exit!') | |
break | |
print(_,resx) | |
# see if it worked | |
for i_episode in range(10): | |
observation = env.reset() | |
err = 0.0 | |
for t in range(200): | |
env.render() | |
action = quad_control(resx,observation,t) | |
observation, reward, done, info = env.step(action) | |
err -= reward | |
if done: | |
print("Episode finished after {} timesteps".format(t+1)) | |
break | |
print('{:.2f}'.format(err)) | |
print("we're done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment