Created
January 16, 2017 06:16
-
-
Save amoudgl/108123c880488965b8c1744cacd60dd6 to your computer and use it in GitHub Desktop.
Hill Climbing Algorithm for Cartpole Environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hill climbing algorithm | |
# generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration | |
# if new configuration is better than old one, discard the old one and accept the new one | |
# optimize it for weighted sum | |
# returns the net episode reward | |
def get_episode_reward(env, observation, params): | |
t = 0 | |
net_reward = 0 | |
while (t < 1000): | |
if (np.inner(observation, params) < 0): | |
action = 0 | |
else: | |
action = 1 | |
observation, reward, done, info = env.step(action) | |
if done: | |
print("Episode %d finished after %d timesteps, reward = %d"%(episode, t + 1, net_reward + 1)) | |
break | |
net_reward += reward | |
t += 1 | |
return net_reward | |
# imports and initializations | |
import gym | |
from gym import wrappers | |
import numpy as np | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-hill-climbing', force=True) | |
max_reward = 0 | |
gamma = 0.3 | |
params = 2 * np.random.rand(4) - 1 | |
for episode in range(10000): | |
# get new random parameter tuple | |
random_noise = 2 * np.random.rand(4) - 1 | |
noisy_params = params + gamma * random_noise | |
# reset environment and check if noisy parameters performs better than current ones | |
observation = env.reset() | |
net_reward = get_episode_reward(env, observation, noisy_params) | |
if (net_reward >= max_reward): | |
max_reward = net_reward | |
params = noisy_params |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment