Skip to content

Instantly share code, notes, and snippets.

@hugoalvarado
Last active October 16, 2016 05:49
Show Gist options
  • Save hugoalvarado/c9f0f27bf619f4bafac840fac5c52d1c to your computer and use it in GitHub Desktop.
Save hugoalvarado/c9f0f27bf619f4bafac840fac5c52d1c to your computer and use it in GitHub Desktop.
import gym
import numpy as np
import time
def run_test_env(env_name, perform_action = True, exit_on_done = True):
env = gym.make(env_name)
for i_episode in range(20):
observation = env.reset()
for t in range(100):
env.render()
if not perform_action:
continue
action = env.action_space.sample()
print(action)
observation, reward, done, info = env.step(action)
'''Returns:
observation (object): agent's observation of the current environment
reward (float) : amount of reward returned after previous action
done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
'''
#print("Observation %s reward %s done %s info $s" % (1, reward, done, 1))
print(observation, reward, done, info)
if done:
print('Episode {} done in {} steps'.format(i_episode, t))
if exit_on_done:
break
# use hill climbing algorithm, initialize weights randomly
# use memory to save good weights
def run_episode(env, parameters):
observation = env.reset()
total_reward = 0
for _ in range(200):
env.render()
# move left (0) or right (1)
# initialize random weights
action = 0 if np.matmul(parameters, observation) < 0 else 1
observation, reward, done, info = env.step(action)
# wait a little to make it easier to view the action behavior
#time.sleep(0.2)
total_reward += reward
if done:
print('Done')
break
return total_reward
def random_parameters(param_count):
return np.random.rand(param_count) * 2 -1
#hill climbing
def train(submit):
env = gym.make('CartPole-v0')
noise_scaling = 0.1
parameters = random_parameters(4)
new_parameters = None
best_reward = 0
for i in range(2000):
new_parameters = parameters + random_parameters(4) * noise_scaling
reward = run_episode(env, new_parameters)
print("Reward %d best %d : %f %f %f %f" % (reward, best_reward, *parameters))
if reward > best_reward:
print("New reward %d" % reward)
best_reward = reward
parameters = new_parameters
if reward == 200:
print(i)
break
return parameters
#r = train(submit = False)
#view different start positions
#run_test_env('CartPole-v0', perform_action=False, exit_on_done=False)
#view behavior of cart with random actions, do not exit environment
#even if pole is past the recovery point
#run_test_env('CartPole-v0', perform_action=True, exit_on_done=False)
#run_test_env('CartPole-v0')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment