Skip to content

Instantly share code, notes, and snippets.

@JKCooper2
Last active June 15, 2016 12:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save JKCooper2/73153c409f21d42e2976cc640ec58f4f to your computer and use it in GitHub Desktop.
Save JKCooper2/73153c409f21d42e2976cc640ec58f4f to your computer and use it in GitHub Desktop.
Linear Model Hill Climbing for CartPole
For part 1 of https://openai.com/requests-for-research/#cartpole
Quite often it doesn't solve (because of local minimum)
import gym
import gym.scoreboard.scoring
from hill_climbing import HillClimbing
def main():
env = gym.make('CartPole-v0')
agent = HillClimbing(env.action_space, env.observation_space)
upload = True # Sets whether to upload to OpenAI
outdir = '/tmp/' + agent.name + '-results'
env.monitor.start(outdir, force=True)
episode_count = 2000
best_repeat = 200
for i in xrange(episode_count):
if agent.best_score < 200:
agent.mutate()
else:
best_repeat -= 1
if best_repeat <= 0:
print "Complete"
break
ob = env.reset()
reward = 0
done = False
action = agent.act(ob, reward, done)
while not done:
ob, reward, done, _ = env.step(action)
action = agent.act(ob, reward, done)
print gym.scoreboard.scoring.score_from_local(outdir)
env.monitor.close()
if upload:
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())
if __name__ == '__main__':
main()
from linear_model import LinearModel
class HillClimbing:
def __init__(self, action_space, observation_space, noise=0.5):
self.name = "Hill Climbing"
self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
self.action_space = action_space
self.observation_space = observation_space
self.model = LinearModel(len(self.observation_space.low))
self.noise = noise # 1 standard deviation of motion
self.best_score = 0
self.episode_reward = 0
def mutate(self):
self.model.mutate(self.noise)
def act(self, observation, reward, done):
if self.model.score(observation) <= 0:
action = 0
else:
action = 1
self.episode_reward += reward
if done:
if self.episode_reward > self.best_score:
self.best_score = self.episode_reward
self.model.set_best_vals() # Set the best vals found
else:
self.model.revert() # Reverts the model to the best vals found so far
self.episode_reward = 0
return action
import numpy as np
import copy
class LinearModel:
def __init__(self, n):
self.vals = np.random.randn(n)
self.best_vals = None
def score(self, observation):
return sum(observation[i] * self.vals[i] for i in range(len(observation)))
def mutate(self, noise):
for i in range(len(self.vals)):
self.vals[i] += np.random.normal() * noise
def set_best_vals(self):
self.best_vals = copy.copy(self.vals)
def revert(self):
self.vals = copy.copy(self.best_vals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment