Sam greydanus

## pg-pong.py
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym

# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward

## sa.py
import numpy as np

class Annealer():

    def __init__(self, step_function, energy_function):
        self.step_function = step_function
        self.energy_function = energy_function

    def run(
        self, state, temperature, room_temperature, cooling_factor
	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-4
	gamma = 0.99 # discount factor for reward
	import numpy as np

	class Annealer():

	def __init__(self, step_function, energy_function):
	self.step_function = step_function
	self.energy_function = energy_function

	def run(
	self, state, temperature, room_temperature, cooling_factor