Skip to content

Instantly share code, notes, and snippets.

@prakhar21
Created May 2, 2018 12:44
Show Gist options
  • Save prakhar21/9b196cbc3b7153909a7238341c30d307 to your computer and use it in GitHub Desktop.
Save prakhar21/9b196cbc3b7153909a7238341c30d307 to your computer and use it in GitHub Desktop.
Solveing OpenAI CartPole-v0 with Random Search and Weight Initialization
'''
@uthor: Prakhar Mishra
'''
import gym
import numpy as np
env = gym.make('CartPole-v0')
class LinearCombinationPolicy():
def __init__(self, episodes=1000, steps=200):
self.action = -1
self.episodes = episodes
self.steps = steps
def combine(self, parameters, observation):
return np.matmul(parameters,observation)
def action_selection(self, observation, parameters):
if self.combine(parameters, observation) < 0:
self.action = 0
else:
self.action = 1
def get_action(self, observation, parameters):
self.action_selection(observation, parameters)
return self.action
linearLearn = LinearCombinationPolicy()
for _ in xrange(linearLearn.episodes):
observation = env.reset() # reset the environment state
parameters = np.random.rand(4) * 2 - 1 # start with variables weights every episode assigned randomly
TOTALREWARD = 0
for step in xrange(linearLearn.steps):
env.render()
action = linearLearn.get_action(observation, parameters)
observation_, reward, done, info = env.step(action)
TOTALREWARD += reward
observation = observation_
if done:
break
if TOTALREWARD == 200:
print 'total reward is {}'.format(TOTALREWARD)
print 'parameters are {}'.format(parameters)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment