MikeShi42/main.py Secret

## main.py
for _ in range(5000):
    observations += [observation.tolist()] # Record the observations for normalization and replay

    if done: # If the simulation was over last iteration, exit loop
      break

    # Pick an action according to the policy matrix
    outcome = np.dot(policy, observation)
    action = 1 if outcome > 0 else 0

    # Make the action, record reward
    observation, reward, done, info = env.step(action)
    score += reward

  return score, observations
	for _ in range(5000):
	observations += [observation.tolist()] # Record the observations for normalization and replay

	if done: # If the simulation was over last iteration, exit loop
	break

	# Pick an action according to the policy matrix
	outcome = np.dot(policy, observation)
	action = 1 if outcome > 0 else 0

	# Make the action, record reward
	observation, reward, done, info = env.step(action)
	score += reward

	return score, observations