Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
CartPole Checkpoint 3
import gym
import numpy as np
env = gym.make('CartPole-v1')
def play(env, policy):
observation = env.reset()
done = False
score = 0
observations = []
for _ in range(5000):
observations += [observation.tolist()] # Record the observations for normalization and replay
if done: # If the simulation was over last iteration, exit loop
# Pick an action according to the policy matrix
outcome =, observation)
action = 1 if outcome > 0 else 0
# Make the action, record reward
observation, reward, done, info = env.step(action)
score += reward
return score, observations
max = (0, [], [])
# We changed the next two lines!
for _ in range(100):
policy = np.random.rand(1,4) - 0.5
score, observations = play(env, policy)
if score > max[0]:
max = (score, observations, policy)
print('Max Score', max[0])
from flask import Flask
import json
app = Flask(__name__, static_folder='.')
def data():
return json.dumps(max[1])
def root():
return app.send_static_file('./index.html')'', port='3000')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment