baoblackcoal baoblackcoal

## pg-cartpole.py
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym

# hyperparameters
H = 200  # number of hidden layer neurons
batch_size = 10  # every how many episodes to do a param update?
learning_rate = 1e-1
gamma = 0.99 # discount factor for reward

## README
For part 1 of https://openai.com/requests-for-research/#cartpole

Execute by command, for example:

python environment.py --env CartPole-v0 --threshold 200  --noise 33
python environment.py --env Hoodle-v0 --threshold 1  --noise 1
(Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 )

## random_agent
..

## gist:f28571bc6467bc3e94a4
"""
This is a batched LSTM forward and backward pass
"""
import numpy as np
import code

class LSTM:

  @staticmethod
  def init(input_size, hidden_size, fancy_forget_bias_init = 3):

## gist:0134129d24cbe02615e9
GET /file HTTP/1.0
Host: 192.168.123.1:3001
Connection: keep-alive
Cache-Control: no-cache
User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36
Accept: */*
Authorization: token
Accept-Encoding: gzip,deflate,sdch
Accept-Language: zh-CN,zh;q=0.8
	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-1
	gamma = 0.99 # discount factor for reward
	For part 1 of https://openai.com/requests-for-research/#cartpole

	Execute by command, for example:

	python environment.py --env CartPole-v0 --threshold 200 --noise 33
	python environment.py --env Hoodle-v0 --threshold 1 --noise 1
	(Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 )
	"""
	This is a batched LSTM forward and backward pass
	"""
	import numpy as np
	import code

	class LSTM:

	@staticmethod
	def init(input_size, hidden_size, fancy_forget_bias_init = 3):
	GET /file HTTP/1.0
	Host: 192.168.123.1:3001
	Connection: keep-alive
	Cache-Control: no-cache
	User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36
	Accept: /
	Authorization: token
	Accept-Encoding: gzip,deflate,sdch
	Accept-Language: zh-CN,zh;q=0.8