Skip to content

Instantly share code, notes, and snippets.



Created Sep 18, 2018
What would you like to do?
CartPole Checkpoint 2
import gym
import numpy as np
env = gym.make('CartPole-v1')
def play(env, policy):
observation = env.reset()
done = False
score = 0
observations = []
for _ in range(5000):
observations += [observation.tolist()] # Record the observations for normalization and replay
if done: # If the simulation was over last iteration, exit loop
# Pick an action according to the policy matrix
outcome =, observation)
action = 1 if outcome > 0 else 0
# Make the action, record reward
observation, reward, done, info = env.step(action)
score += reward
return score, observations
max = (0, [], [])
for _ in range(10):
policy = np.random.rand(1,4)
score, observations = play(env, policy)
if score > max[0]:
max = (score, observations, policy)
print('Max Score', max[0])
from flask import Flask
import json
app = Flask(__name__, static_folder='.')
def data():
return json.dumps(max[1])
def root():
return app.send_static_file('./index.html')'', port='3000')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.