JKCooper2/README

## README
Alteration to [standard hill climbing model](https://gym.openai.com/algorithms/alg_WKinUO3TNabzwPeaD7A)

Uses biased update that allows for worse performance to becomes new standard with reduced probability

For CartPole environment should result in larger percentage of tests solving the problem

## environment.py
import gym
import gym.scoreboard.scoring
from hill_climbing import HillClimbing


def main():
    env = gym.make('CartPole-v0')
    agent = HillClimbing(env.action_space, env.observation_space, noise=0.25)
    upload = False  # Sets whether to upload to OpenAI

    outdir = '/tmp/' + agent.name + '-results'
    env.monitor.start(outdir, force=True, video_callable=False)

    episode_count = 2000

    for i in xrange(episode_count):

        agent.mutate()

        ob = env.reset()
        reward = 0
        done = False
        action = agent.act(ob, reward, done)

        while not done:
            ob, reward, done, _ = env.step(action)
            action = agent.act(ob, reward, done)

        print gym.scoreboard.scoring.score_from_local(outdir)

    env.monitor.close()

    if upload:
        gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())


if __name__ == '__main__':
    main()

## hill_climbing.py
from linear_model import LinearModel
import numpy as np


class HillClimbing:
    def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2):
        self.name = "Hill Climbing"
        self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A"
        self.action_space = action_space
        self.observation_space = observation_space

        self.model = LinearModel(len(self.observation_space.low))
        self.noise = noise

        self.best_score = 0
        self.episode_reward = 0

        self.update_percent = update_percent  # Maximum percent below current best the update will accept

    def mutate(self):
        self.model.mutate(self.noise)

    def act(self, observation, reward, done):
        action = self.model.score(observation)

        self.episode_reward += reward

        if done:
            # Determines the difference between the episode reward and current best
            update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01)  # +0.01 to stops divide by zero
            if np.random.uniform() < update:
                self.best_score = self.episode_reward
                self.model.set_best_vals()

            else:
                self.model.revert()

            self.episode_reward = 0

        return action

## linear_model.py
import numpy as np
import copy


class LinearModel:
    def __init__(self, n):
        self.vals = np.random.randn(n)
        self.best_vals = None

    def score(self, observation):
        if sum(observation[i] * self.vals[i] for i in range(len(observation))) <= 0:
            return 0

        return 1

    def mutate(self, noise):
        for i in range(len(self.vals)):
            self.vals[i] += np.random.normal() * noise

    def set_best_vals(self):
        self.best_vals = copy.copy(self.vals)

    def revert(self):
        self.vals = copy.copy(self.best_vals)
	Alteration to [standard hill climbing model](https://gym.openai.com/algorithms/alg_WKinUO3TNabzwPeaD7A)

	Uses biased update that allows for worse performance to becomes new standard with reduced probability

	For CartPole environment should result in larger percentage of tests solving the problem
	import gym
	import gym.scoreboard.scoring
	from hill_climbing import HillClimbing


	def main():
	env = gym.make('CartPole-v0')
	agent = HillClimbing(env.action_space, env.observation_space, noise=0.25)
	upload = False # Sets whether to upload to OpenAI

	outdir = '/tmp/' + agent.name + '-results'
	env.monitor.start(outdir, force=True, video_callable=False)

	episode_count = 2000

	for i in xrange(episode_count):

	agent.mutate()

	ob = env.reset()
	reward = 0
	done = False
	action = agent.act(ob, reward, done)

	while not done:
	ob, reward, done, _ = env.step(action)
	action = agent.act(ob, reward, done)

	print gym.scoreboard.scoring.score_from_local(outdir)

	env.monitor.close()

	if upload:
	gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())


	if __name__ == '__main__':
	main()
	from linear_model import LinearModel
	import numpy as np


	class HillClimbing:
	def __init__(self, action_space, observation_space, noise=0.5, update_percent=0.2):
	self.name = "Hill Climbing"
	self.alg_id = "1"#alg_WKinUO3TNabzwPeaD7A"
	self.action_space = action_space
	self.observation_space = observation_space

	self.model = LinearModel(len(self.observation_space.low))
	self.noise = noise

	self.best_score = 0
	self.episode_reward = 0

	self.update_percent = update_percent # Maximum percent below current best the update will accept

	def mutate(self):
	self.model.mutate(self.noise)

	def act(self, observation, reward, done):
	action = self.model.score(observation)

	self.episode_reward += reward

	if done:
	# Determines the difference between the episode reward and current best
	update = self.update_percent + (self.episode_reward - self.best_score)/(self.best_score + 0.01) # +0.01 to stops divide by zero
	if np.random.uniform() < update:
	self.best_score = self.episode_reward
	self.model.set_best_vals()

	else:
	self.model.revert()

	self.episode_reward = 0

	return action
	import numpy as np
	import copy


	class LinearModel:
	def __init__(self, n):
	self.vals = np.random.randn(n)
	self.best_vals = None

	def score(self, observation):
	if sum(observation[i] * self.vals[i] for i in range(len(observation))) <= 0:
	return 0

	return 1

	def mutate(self, noise):
	for i in range(len(self.vals)):
	self.vals[i] += np.random.normal() * noise

	def set_best_vals(self):
	self.best_vals = copy.copy(self.vals)

	def revert(self):
	self.vals = copy.copy(self.best_vals)