Skip to content

Instantly share code, notes, and snippets.

@tanzhenyu
tanzhenyu / ppo_tf2.py
Last active August 23, 2019 17:56
ppo TF2
import tensorflow as tf
import gym
import numpy as np
import scipy.signal
def mlp(ob_space, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
model = tf.keras.Sequential()
for h in hidden_sizes[:-1]:
model.add(tf.keras.layers.Dense(units=h, activation=activation))
model.add(tf.keras.layers.Dense(units=hidden_sizes[-1], activation=output_activation))
@tanzhenyu
tanzhenyu / ppo_main.py
Created August 23, 2019 15:50
ppo main loop
model, env = ppo()
obs = env.reset()
reward = 0
while True:
action, _, _ = model.get_pi_logpi_vf(obs.reshape(1, -1))
obs, r, d, _ = env.step(action.numpy()[0])
reward += r
env.render()
if d:
print('episode reward {}'.format(reward))
@tanzhenyu
tanzhenyu / ppo_training_loop.py
Last active August 23, 2019 17:57
PPO training
def ppo(seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4,
vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01):
tf.random.set_seed(seed)
np.random.seed(seed)
env = gym.make('CartPole-v1')
ob_space = env.observation_space
ac_space = env.action_space
obs_dim = ob_space.shape
@tanzhenyu
tanzhenyu / ppo_buffer.py
Last active August 23, 2019 15:39
PPO Buffer
def discount_cumsum(x, discount):
return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
def combined_shape(length, shape=None):
if shape is None:
return (length,)
return (length, shape) if np.isscalar(shape) else (length, *shape)
class PPOBuffer:
@tanzhenyu
tanzhenyu / actor_critic.py
Last active August 23, 2019 17:56
PPO Actor Critic Model
def mlp(ob_space, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
model = tf.keras.Sequential()
for h in hidden_sizes[:-1]:
model.add(tf.keras.layers.Dense(units=h, activation=activation))
model.add(tf.keras.layers.Dense(units=hidden_sizes[-1], activation=output_activation))
model.build(input_shape=(None,) + ob_space.shape)
return model
class MlpCategoricalActorCritic(tf.keras.Model):
@tanzhenyu
tanzhenyu / gist:48d705ba49acb0ff166da2db986745ef
Created July 19, 2018 16:35
nasnet model for model_to_estimator
# Download your data here: https://www.kaggle.com/c/dogs-vs-cats/data and split them into /train/dogs & /train/cats
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
import tensorflow as tf
def test_nasnet(self):
# a gist of model cloning (sequential). (functional model cloning should be the same)
def to_list(x):
if isinstance(x, list):
return x
else:
return [x]
def is_keras_tensor(x):
return hasattr(x, '_keras_history')
def clone_sequential_model(model, input_tensors=None):
def clone(layer):