tanzhenyu

## ppo_tf2.py
import tensorflow as tf
import gym
import numpy as np
import scipy.signal

def mlp(ob_space, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
    model = tf.keras.Sequential()
    for h in hidden_sizes[:-1]:
        model.add(tf.keras.layers.Dense(units=h, activation=activation))
    model.add(tf.keras.layers.Dense(units=hidden_sizes[-1], activation=output_activation))

## ppo_main.py
model, env = ppo()
obs = env.reset()
reward = 0
while True:
  action, _, _ = model.get_pi_logpi_vf(obs.reshape(1, -1))
  obs, r, d, _ = env.step(action.numpy()[0])
  reward += r
  env.render()
  if d:
    print('episode reward {}'.format(reward))

## ppo_training_loop.py
def ppo(seed=0,  steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4,
        vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01):

    tf.random.set_seed(seed)
    np.random.seed(seed)

    env = gym.make('CartPole-v1')
    ob_space = env.observation_space
    ac_space = env.action_space
    obs_dim = ob_space.shape

## ppo_buffer.py
def discount_cumsum(x, discount):
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]

def combined_shape(length, shape=None):
    if shape is None:
        return (length,)
    return (length, shape) if np.isscalar(shape) else (length, *shape)

class PPOBuffer:

## actor_critic.py
def mlp(ob_space, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
    model = tf.keras.Sequential()
    for h in hidden_sizes[:-1]:
        model.add(tf.keras.layers.Dense(units=h, activation=activation))
    model.add(tf.keras.layers.Dense(units=hidden_sizes[-1], activation=output_activation))
    model.build(input_shape=(None,) + ob_space.shape)
    return model

class MlpCategoricalActorCritic(tf.keras.Model):

## gist:48d705ba49acb0ff166da2db986745ef
# Download your data here: https://www.kaggle.com/c/dogs-vs-cats/data and split them into /train/dogs & /train/cats

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
import tensorflow as tf

def test_nasnet(self):

## gist:22fadcfda66704199a5c5d4edf10c17e
# a gist of model cloning (sequential). (functional model cloning should be the same)
def to_list(x):
    if isinstance(x, list):
        return x
    else:
        return [x]
def is_keras_tensor(x):
    return hasattr(x, '_keras_history')
def clone_sequential_model(model, input_tensors=None):
    def clone(layer):
	import tensorflow as tf
	import gym
	import numpy as np
	import scipy.signal

	def mlp(ob_space, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
	model = tf.keras.Sequential()
	for h in hidden_sizes[:-1]:
	model.add(tf.keras.layers.Dense(units=h, activation=activation))
	model.add(tf.keras.layers.Dense(units=hidden_sizes[-1], activation=output_activation))
	model, env = ppo()
	obs = env.reset()
	reward = 0
	while True:
	action, _, _ = model.get_pi_logpi_vf(obs.reshape(1, -1))
	obs, r, d, _ = env.step(action.numpy()[0])
	reward += r
	env.render()
	if d:
	print('episode reward {}'.format(reward))
	def ppo(seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4,
	vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01):

	tf.random.set_seed(seed)
	np.random.seed(seed)

	env = gym.make('CartPole-v1')
	ob_space = env.observation_space
	ac_space = env.action_space
	obs_dim = ob_space.shape
	def discount_cumsum(x, discount):
	return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]

	def combined_shape(length, shape=None):
	if shape is None:
	return (length,)
	return (length, shape) if np.isscalar(shape) else (length, *shape)

	class PPOBuffer:
	# Download your data here: https://www.kaggle.com/c/dogs-vs-cats/data and split them into /train/dogs & /train/cats

	from keras.preprocessing.image import ImageDataGenerator
	from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
	from keras.models import Sequential
	from keras.layers import Conv2D, MaxPooling2D
	from keras.layers import Activation, Dropout, Flatten, Dense
	import tensorflow as tf

	def test_nasnet(self):
	# a gist of model cloning (sequential). (functional model cloning should be the same)
	def to_list(x):
	if isinstance(x, list):
	return x
	else:
	return [x]
	def is_keras_tensor(x):
	return hasattr(x, '_keras_history')
	def clone_sequential_model(model, input_tensors=None):
	def clone(layer):