Created
June 10, 2016 01:05
-
-
Save JKCooper2/bef5cae764a28042c7d031a00f3355bf to your computer and use it in GitHub Desktop.
Hill Climbing Linear Model w/ Biased Update
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Alteration to [standard hill climbing model](https://gym.openai.com/algorithms/alg_WKinUO3TNabzwPeaD7A) | |
Uses biased update that allows for worse performance to becomes new standard with reduced probability | |
For CartPole environment should result in larger percentage of tests solving the problem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import gym.scoreboard.scoring | |
from hill_climbing import HillClimbing | |
def main(): | |
env = gym.make('CartPole-v0') | |
agent = HillClimbing(env.action_space, env.observation_space, noise=0.25) | |
upload = False # Sets whether to upload to OpenAI | |
outdir = '/tmp/' + agent.name + '-results' | |
env.monitor.start(outdir, force=True, video_callable=False) | |
episode_count = 2000 | |
for i in xrange(episode_count): | |
agent.mutate() | |
ob = env.reset() | |
reward = 0 | |
done = False | |
action = agent.act(ob, reward, done) | |
while not done: | |
ob, reward, done, _ = env.step(action) | |
action = agent.act(ob, reward, done) | |
print gym.scoreboard.scoring.score_from_local(outdir) | |
env.monitor.close() | |
if upload: | |
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline()) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from linear_model import LinearModel | |
import numpy as np | |
class HillClimbing: | |
def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2): | |
self.name = "Hill Climbing" | |
self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A" | |
self.action_space = action_space | |
self.observation_space = observation_space | |
self.model = LinearModel(len(self.observation_space.low)) | |
self.noise = noise | |
self.best_score = 0 | |
self.episode_reward = 0 | |
self.update_percent = update_percent # Maximum percent below current best the update will accept | |
def mutate(self): | |
self.model.mutate(self.noise) | |
def act(self, observation, reward, done): | |
action = self.model.score(observation) | |
self.episode_reward += reward | |
if done: | |
# Determines the difference between the episode reward and current best | |
update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01) # +0.01 to stops divide by zero | |
if np.random.uniform() < update: | |
self.best_score = self.episode_reward | |
self.model.set_best_vals() | |
else: | |
self.model.revert() | |
self.episode_reward = 0 | |
return action |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import copy | |
class LinearModel: | |
def __init__(self, n): | |
self.vals = np.random.randn(n) | |
self.best_vals = None | |
def score(self, observation): | |
if sum(observation[i] * self.vals[i] for i in range(len(observation))) <= 0: | |
return 0 | |
return 1 | |
def mutate(self, noise): | |
for i in range(len(self.vals)): | |
self.vals[i] += np.random.normal() * noise | |
def set_best_vals(self): | |
self.best_vals = copy.copy(self.vals) | |
def revert(self): | |
self.vals = copy.copy(self.best_vals) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment