anlsh/test2.py

## test2.py
import tensorflow as tf
import numpy as np
import gym


def ff_network(name, layer_dims):
    layer_weight_list = [None] * len(layer_dims)
    layers = [None] * len(layer_dims)
    layers[0] = tf.placeholder('float', [None, layer_dims[0]])
    layer_weight_list = [None] * (len(layer_dims) - 1)
    layer_bias_list = [None] * len(layer_weight_list)

    for i, width in enumerate(layer_dims[1:]):
        # Correct the index
        i += 1

        layer_weights = tf.get_variable(name + str(i), [layer_dims[i - 1], width])
        layer_bias = tf.get_variable(name + str(i) + "bias", [layer_dims[i]])
        layer = tf.matmul(layers[i - 1], layer_weights) + layer_bias

        if i != len(layer_dims) - 1:
            layer = tf.nn.sigmoid(layer)

        layer_weight_list[i - 1] = layer_weights
        layer_bias_list[i - 1] = layer_bias
        layers[i] = layer

    return layers, layer_weight_list + layer_bias_list

def sample_trajectory(env, actor, max_depth=1000, render=False):

    """
    Given an environment 'env' and a function actor: state-> action
    and a maximum number of steps, generate a trajectory from the actor
    """

    step = 0
    done = False

    obs = env.reset()
    state_dims = len(obs)

    while step < max_depth and not done:

        if render:
            env.render()

        action = actor(obs)
        old_obs = obs
        obs, reward, done, info = env.step(action)

        # Some environments have a tendency to randomly start outputting
        # states as column vectors instead of rows. Coerce here to avoid that
        obs = np.reshape(obs, (state_dims,))

        step += 1


def go(env, scope="scope"):

    actor_hidden_dims = [10]

    # Exploration standard deviation
    action_stdv = .1
    max_trajectory_depth = 1000

    render = True

    state_dims = len(env.reset())
    action_dims = env.action_space.shape[0]

    # "Hidden" param

    actor_nn_dims = [state_dims] + actor_hidden_dims + [action_dims]

    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        actor_nn, actor_weights = ff_network("actor_nn", actor_nn_dims)

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        def act_with_noise(state):
            action_shape = tf.shape(actor_nn[-1])
            return sess.run(actor_nn[-1] + tf.random_normal(action_shape,
                                                            stddev=action_stdv),
                            {actor_nn[0]: [state]})


        while True:
            sample_trajectory(env, act_with_noise,
                              max_trajectory_depth, render)

if __name__ == "__main__":
    env = gym.make('Pendulum-v0')
    go(env)
	import tensorflow as tf
	import numpy as np
	import gym


	def ff_network(name, layer_dims):
	layer_weight_list = [None] * len(layer_dims)
	layers = [None] * len(layer_dims)
	layers[0] = tf.placeholder('float', [None, layer_dims[0]])
	layer_weight_list = [None] * (len(layer_dims) - 1)
	layer_bias_list = [None] * len(layer_weight_list)

	for i, width in enumerate(layer_dims[1:]):
	# Correct the index
	i += 1

	layer_weights = tf.get_variable(name + str(i), [layer_dims[i - 1], width])
	layer_bias = tf.get_variable(name + str(i) + "bias", [layer_dims[i]])
	layer = tf.matmul(layers[i - 1], layer_weights) + layer_bias

	if i != len(layer_dims) - 1:
	layer = tf.nn.sigmoid(layer)

	layer_weight_list[i - 1] = layer_weights
	layer_bias_list[i - 1] = layer_bias
	layers[i] = layer

	return layers, layer_weight_list + layer_bias_list

	def sample_trajectory(env, actor, max_depth=1000, render=False):

	"""
	Given an environment 'env' and a function actor: state-> action
	and a maximum number of steps, generate a trajectory from the actor
	"""

	step = 0
	done = False

	obs = env.reset()
	state_dims = len(obs)

	while step < max_depth and not done:

	if render:
	env.render()

	action = actor(obs)
	old_obs = obs
	obs, reward, done, info = env.step(action)

	# Some environments have a tendency to randomly start outputting
	# states as column vectors instead of rows. Coerce here to avoid that
	obs = np.reshape(obs, (state_dims,))

	step += 1


	def go(env, scope="scope"):

	actor_hidden_dims = [10]

	# Exploration standard deviation
	action_stdv = .1
	max_trajectory_depth = 1000

	render = True

	state_dims = len(env.reset())
	action_dims = env.action_space.shape[0]

	# "Hidden" param

	actor_nn_dims = [state_dims] + actor_hidden_dims + [action_dims]

	with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
	actor_nn, actor_weights = ff_network("actor_nn", actor_nn_dims)

	with tf.Session() as sess:

	sess.run(tf.global_variables_initializer())

	def act_with_noise(state):
	action_shape = tf.shape(actor_nn[-1])
	return sess.run(actor_nn[-1] + tf.random_normal(action_shape,
	stddev=action_stdv),
	{actor_nn[0]: [state]})


	while True:
	sample_trajectory(env, act_with_noise,
	max_trajectory_depth, render)

	if __name__ == "__main__":
	env = gym.make('Pendulum-v0')
	go(env)