Skip to content

Instantly share code, notes, and snippets.

@anlsh
Created March 8, 2018 04:59
Show Gist options
  • Save anlsh/7acb53f0d54c1073e57b309e5a52650d to your computer and use it in GitHub Desktop.
Save anlsh/7acb53f0d54c1073e57b309e5a52650d to your computer and use it in GitHub Desktop.
import tensorflow as tf
import numpy as np
import gym
def ff_network(name, layer_dims):
layer_weight_list = [None] * len(layer_dims)
layers = [None] * len(layer_dims)
layers[0] = tf.placeholder('float', [None, layer_dims[0]])
layer_weight_list = [None] * (len(layer_dims) - 1)
layer_bias_list = [None] * len(layer_weight_list)
for i, width in enumerate(layer_dims[1:]):
# Correct the index
i += 1
layer_weights = tf.get_variable(name + str(i), [layer_dims[i - 1], width])
layer_bias = tf.get_variable(name + str(i) + "bias", [layer_dims[i]])
layer = tf.matmul(layers[i - 1], layer_weights) + layer_bias
if i != len(layer_dims) - 1:
layer = tf.nn.sigmoid(layer)
layer_weight_list[i - 1] = layer_weights
layer_bias_list[i - 1] = layer_bias
layers[i] = layer
return layers, layer_weight_list + layer_bias_list
def sample_trajectory(env, actor, max_depth=1000, render=False):
"""
Given an environment 'env' and a function actor: state-> action
and a maximum number of steps, generate a trajectory from the actor
"""
step = 0
done = False
obs = env.reset()
state_dims = len(obs)
while step < max_depth and not done:
if render:
env.render()
action = actor(obs)
old_obs = obs
obs, reward, done, info = env.step(action)
# Some environments have a tendency to randomly start outputting
# states as column vectors instead of rows. Coerce here to avoid that
obs = np.reshape(obs, (state_dims,))
step += 1
def go(env, scope="scope"):
actor_hidden_dims = [10]
# Exploration standard deviation
action_stdv = .1
max_trajectory_depth = 1000
render = True
state_dims = len(env.reset())
action_dims = env.action_space.shape[0]
# "Hidden" param
actor_nn_dims = [state_dims] + actor_hidden_dims + [action_dims]
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
actor_nn, actor_weights = ff_network("actor_nn", actor_nn_dims)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
def act_with_noise(state):
action_shape = tf.shape(actor_nn[-1])
return sess.run(actor_nn[-1] + tf.random_normal(action_shape,
stddev=action_stdv),
{actor_nn[0]: [state]})
while True:
sample_trajectory(env, act_with_noise,
max_trajectory_depth, render)
if __name__ == "__main__":
env = gym.make('Pendulum-v0')
go(env)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment