JKCooper2/README

## README
For part 1 of https://openai.com/requests-for-research/#cartpole

Quite often it doesn't solve (because of local minimum)

## environment.py
import gym
import gym.scoreboard.scoring
from hill_climbing import HillClimbing


def main():
    env = gym.make('CartPole-v0')
    agent = HillClimbing(env.action_space, env.observation_space)
    upload = True  # Sets whether to upload to OpenAI

    outdir = '/tmp/' + agent.name + '-results'
    env.monitor.start(outdir, force=True)

    episode_count = 2000
    best_repeat = 200

    for i in xrange(episode_count):

        if agent.best_score < 200:
            agent.mutate()
        else:
            best_repeat -= 1

        if best_repeat <= 0:
            print "Complete"
            break

        ob = env.reset()
        reward = 0
        done = False
        action = agent.act(ob, reward, done)

        while not done:
            ob, reward, done, _ = env.step(action)
            action = agent.act(ob, reward, done)

        print gym.scoreboard.scoring.score_from_local(outdir)

    env.monitor.close()

    if upload:
        gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())


if __name__ == '__main__':
    main()

## hill_climbing.py
from linear_model import LinearModel


class HillClimbing:
    def __init__(self, action_space, observation_space, noise=0.5):
        self.name = "Hill Climbing"
        self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
        self.action_space = action_space
        self.observation_space = observation_space

        self.model = LinearModel(len(self.observation_space.low))
        self.noise = noise  # 1 standard deviation of motion

        self.best_score = 0
        self.episode_reward = 0

    def mutate(self):
        self.model.mutate(self.noise)

    def act(self, observation, reward, done):
        if self.model.score(observation) <= 0:
            action = 0
        else:
            action = 1

        self.episode_reward += reward

        if done:
            if self.episode_reward > self.best_score:
                self.best_score = self.episode_reward
                self.model.set_best_vals()  # Set the best vals found

            else:
                self.model.revert()  # Reverts the model to the best vals found so far

            self.episode_reward = 0

        return action

## linear_model.py
import numpy as np
import copy

class LinearModel:
    def __init__(self, n):
        self.vals = np.random.randn(n)
        self.best_vals = None

    def score(self, observation):
        return sum(observation[i] * self.vals[i] for i in range(len(observation)))

    def mutate(self, noise):
        for i in range(len(self.vals)):
            self.vals[i] += np.random.normal() * noise

    def set_best_vals(self):
        self.best_vals = copy.copy(self.vals)

    def revert(self):
        self.vals = copy.copy(self.best_vals)
	For part 1 of https://openai.com/requests-for-research/#cartpole

	Quite often it doesn't solve (because of local minimum)
	import gym
	import gym.scoreboard.scoring
	from hill_climbing import HillClimbing


	def main():
	env = gym.make('CartPole-v0')
	agent = HillClimbing(env.action_space, env.observation_space)
	upload = True # Sets whether to upload to OpenAI

	outdir = '/tmp/' + agent.name + '-results'
	env.monitor.start(outdir, force=True)

	episode_count = 2000
	best_repeat = 200

	for i in xrange(episode_count):

	if agent.best_score < 200:
	agent.mutate()
	else:
	best_repeat -= 1

	if best_repeat <= 0:
	print "Complete"
	break

	ob = env.reset()
	reward = 0
	done = False
	action = agent.act(ob, reward, done)

	while not done:
	ob, reward, done, _ = env.step(action)
	action = agent.act(ob, reward, done)

	print gym.scoreboard.scoring.score_from_local(outdir)

	env.monitor.close()

	if upload:
	gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline())


	if __name__ == '__main__':
	main()
	from linear_model import LinearModel


	class HillClimbing:
	def __init__(self, action_space, observation_space, noise=0.5):
	self.name = "Hill Climbing"
	self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
	self.action_space = action_space
	self.observation_space = observation_space

	self.model = LinearModel(len(self.observation_space.low))
	self.noise = noise # 1 standard deviation of motion

	self.best_score = 0
	self.episode_reward = 0

	def mutate(self):
	self.model.mutate(self.noise)

	def act(self, observation, reward, done):
	if self.model.score(observation) <= 0:
	action = 0
	else:
	action = 1

	self.episode_reward += reward

	if done:
	if self.episode_reward > self.best_score:
	self.best_score = self.episode_reward
	self.model.set_best_vals() # Set the best vals found

	else:
	self.model.revert() # Reverts the model to the best vals found so far

	self.episode_reward = 0

	return action
	import numpy as np
	import copy

	class LinearModel:
	def __init__(self, n):
	self.vals = np.random.randn(n)
	self.best_vals = None

	def score(self, observation):
	return sum(observation[i] * self.vals[i] for i in range(len(observation)))

	def mutate(self, noise):
	for i in range(len(self.vals)):
	self.vals[i] += np.random.normal() * noise

	def set_best_vals(self):
	self.best_vals = copy.copy(self.vals)

	def revert(self):
	self.vals = copy.copy(self.best_vals)