MikeShi42/main.py

## main.py
import gym
import numpy as np

env = gym.make('CartPole-v1')

def play(env, policy):
  observation = env.reset()

  done = False
  score = 0
  observations = []

  for _ in range(5000):
    observations += [observation.tolist()] # Record the observations for normalization and replay

    if done: # If the simulation was over last iteration, exit loop
      break

    # Pick an action according to the policy matrix
    outcome = np.dot(policy, observation)
    action = 1 if outcome > 0 else 0

    # Make the action, record reward
    observation, reward, done, info = env.step(action)
    score += reward

  return score, observations
	import gym
	import numpy as np

	env = gym.make('CartPole-v1')

	def play(env, policy):
	observation = env.reset()

	done = False
	score = 0
	observations = []

	for _ in range(5000):
	observations += [observation.tolist()] # Record the observations for normalization and replay

	if done: # If the simulation was over last iteration, exit loop
	break

	# Pick an action according to the policy matrix
	outcome = np.dot(policy, observation)
	action = 1 if outcome > 0 else 0

	# Make the action, record reward
	observation, reward, done, info = env.step(action)
	score += reward

	return score, observations