Created
May 2, 2018 12:44
-
-
Save prakhar21/9b196cbc3b7153909a7238341c30d307 to your computer and use it in GitHub Desktop.
Solveing OpenAI CartPole-v0 with Random Search and Weight Initialization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
@uthor: Prakhar Mishra | |
''' | |
import gym | |
import numpy as np | |
env = gym.make('CartPole-v0') | |
class LinearCombinationPolicy(): | |
def __init__(self, episodes=1000, steps=200): | |
self.action = -1 | |
self.episodes = episodes | |
self.steps = steps | |
def combine(self, parameters, observation): | |
return np.matmul(parameters,observation) | |
def action_selection(self, observation, parameters): | |
if self.combine(parameters, observation) < 0: | |
self.action = 0 | |
else: | |
self.action = 1 | |
def get_action(self, observation, parameters): | |
self.action_selection(observation, parameters) | |
return self.action | |
linearLearn = LinearCombinationPolicy() | |
for _ in xrange(linearLearn.episodes): | |
observation = env.reset() # reset the environment state | |
parameters = np.random.rand(4) * 2 - 1 # start with variables weights every episode assigned randomly | |
TOTALREWARD = 0 | |
for step in xrange(linearLearn.steps): | |
env.render() | |
action = linearLearn.get_action(observation, parameters) | |
observation_, reward, done, info = env.step(action) | |
TOTALREWARD += reward | |
observation = observation_ | |
if done: | |
break | |
if TOTALREWARD == 200: | |
print 'total reward is {}'.format(TOTALREWARD) | |
print 'parameters are {}'.format(parameters) | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment