Yang Gu greeness

## pg-pong.py
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym

# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward

## gist:1498382
import numpy
import math

# LSH signature generation using random projection
def get_signature(user_vector, rand_proj):
    res = 0
    for p in (rand_proj):
        res = res << 1
        val = numpy.dot(p, user_vector)
        if val >= 0:
	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-4
	gamma = 0.99 # discount factor for reward
	import numpy
	import math

	# LSH signature generation using random projection
	def get_signature(user_vector, rand_proj):
	res = 0
	for p in (rand_proj):
	res = res << 1
	val = numpy.dot(p, user_vector)
	if val >= 0: