Masanori Yamada MasanoriYamada

## torch_jacobian.py
def get_jacobian(net, x, noutputs):
    x = x.squeeze()
    n = x.size()[0]
    x = x.repeat(noutputs, 1)
    x.requires_grad_(True)
    y = net(x)
    y.backward(torch.eye(noutputs))
    return x.grad.data

## pg-pong.py
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym

# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
	def get_jacobian(net, x, noutputs):
	x = x.squeeze()
	n = x.size()[0]
	x = x.repeat(noutputs, 1)
	x.requires_grad_(True)
	y = net(x)
	y.backward(torch.eye(noutputs))
	return x.grad.data
	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-4
	gamma = 0.99 # discount factor for reward