Skip to content

Instantly share code, notes, and snippets.

@JKCooper2
Created June 10, 2016 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JKCooper2/bef5cae764a28042c7d031a00f3355bf to your computer and use it in GitHub Desktop.
Save JKCooper2/bef5cae764a28042c7d031a00f3355bf to your computer and use it in GitHub Desktop.
Hill Climbing Linear Model w/ Biased Update
Alteration to [standard hill climbing model](https://gym.openai.com/algorithms/alg_WKinUO3TNabzwPeaD7A)
Uses biased update that allows for worse performance to becomes new standard with reduced probability
For CartPole environment should result in larger percentage of tests solving the problem
import gym
import gym.scoreboard.scoring
from hill_climbing import HillClimbing
def main():
env = gym.make('CartPole-v0')
agent = HillClimbing(env.action_space, env.observation_space, noise=0.25)
upload = False # Sets whether to upload to OpenAI
outdir = '/tmp/' + agent.name + '-results'
env.monitor.start(outdir, force=True, video_callable=False)
episode_count = 2000
for i in xrange(episode_count):
agent.mutate()
ob = env.reset()
reward = 0
done = False
action = agent.act(ob, reward, done)
while not done:
ob, reward, done, _ = env.step(action)
action = agent.act(ob, reward, done)
print gym.scoreboard.scoring.score_from_local(outdir)
env.monitor.close()
if upload:
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())
if __name__ == '__main__':
main()
from linear_model import LinearModel
import numpy as np
class HillClimbing:
def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2):
self.name = "Hill Climbing"
self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A"
self.action_space = action_space
self.observation_space = observation_space
self.model = LinearModel(len(self.observation_space.low))
self.noise = noise
self.best_score = 0
self.episode_reward = 0
self.update_percent = update_percent # Maximum percent below current best the update will accept
def mutate(self):
self.model.mutate(self.noise)
def act(self, observation, reward, done):
action = self.model.score(observation)
self.episode_reward += reward
if done:
# Determines the difference between the episode reward and current best
update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01) # +0.01 to stops divide by zero
if np.random.uniform() < update:
self.best_score = self.episode_reward
self.model.set_best_vals()
else:
self.model.revert()
self.episode_reward = 0
return action
import numpy as np
import copy
class LinearModel:
def __init__(self, n):
self.vals = np.random.randn(n)
self.best_vals = None
def score(self, observation):
if sum(observation[i] * self.vals[i] for i in range(len(observation))) <= 0:
return 0
return 1
def mutate(self, noise):
for i in range(len(self.vals)):
self.vals[i] += np.random.normal() * noise
def set_best_vals(self):
self.best_vals = copy.copy(self.vals)
def revert(self):
self.vals = copy.copy(self.best_vals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment