Skip to content

Instantly share code, notes, and snippets.

Created January 16, 2017 06:17
Show Gist options
  • Save amoudgl/6fcb2db7314e6c4f6b7a028dfe1f27db to your computer and use it in GitHub Desktop.
Save amoudgl/6fcb2db7314e6c4f6b7a028dfe1f27db to your computer and use it in GitHub Desktop.
Random Guessing Algorithm for Cartpole Environment
# random guessing algorithm
# generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward.
# optimize it for weighted sum
import gym
from gym import wrappers
import numpy as np
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True)
max_reward = 0
optimal_params = np.zeros(4)
for episode in range(1000):
observation = env.reset()
params = 2 * np.random.rand(4) - 1
net_reward = 0
t = 0
if (np.inner(observation, params) < 0):
action = 0
action = 1
observation, reward, done, info = env.step(action)
if done:
print("Episode %d finished after %d timesteps, reward = %d"%(episode, t + 1, net_reward + 1))
if (net_reward > max_reward):
max_reward = net_reward
optimal_params = params
net_reward += reward
t = t + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment