Skip to content

Instantly share code, notes, and snippets.

AurelianTactics

Block or report user

Report or block AurelianTactics

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View basic_env_ray.py
class UnityEnvWrapper(gym.Env):
def __init__(self, env_config):
self.vector_index = env_config.vector_index
self.worker_index = env_config.worker_index
self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
# Name of the Unity environment binary to launch
env_name = '/home/jim/projects/unity_ray/basic_env_linux/basic_env_linux'
self.env = UnityEnv(env_name, worker_id=self.worker_id, use_visual=False, multiagent=False, no_graphics=True) #
self.action_space = self.env.action_space
self.observation_space = self.env.observation_space
View basic_env_stable_multi_env.py
def make_env(env_id, log_dir, rank):
def _init():
env = UnityEnv(env_id, worker_id=rank, use_visual=False)
env = Monitor(env, log_dir, allow_early_resets=True)
return _init
env_id = "unity_ray/basic_env_linux/basic_env_linux"
num_env = 2
worker_id = 9
env = SubprocVecEnv([make_env(env_id, log_dir, i+worker_id) for i in range(num_env)])
@AurelianTactics
AurelianTactics / basic_env_stable_single_env.py
Last active Aug 25, 2019
Unity ML Basic Env Stable Baselines Implementation
View basic_env_stable_single_env.py
# create unity env
from gym_unity.envs import UnityEnv
env_id = "unity_ray/basic_env_linux/basic_env_linux"
env = UnityEnv(env_id, worker_id=2, use_visual=False, no_graphics=False)
# run stable baselines
env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized environment to run
model = PPO2(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=10000)
View bcq_actor.py
self.fc1_actor_ = tf.contrib.layers.fully_connected(tf.concat([self.state_, self.action_], axis=1), actor_hs_list[0], activation_fn=tf.nn.relu) #400
self.fc2_actor_ = tf.contrib.layers.fully_connected(self.fc1_actor_, actor_hs_list[1], activation_fn=tf.nn.relu)
self.fc3_actor_ = tf.contrib.layers.fully_connected(self.fc2_actor_, action_dim, activation_fn=tf.nn.tanh) * 0.05 * max_action
self.actor_clip_ = tf.clip_by_value((self.fc3_actor_ + self.action_),-max_action, max_action)
View tf_target_network_update.py
#copies the mainQN values to the targetQN
#from Denny Britz's excellent RL repo
#https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/Double%20DQN%20Solution.ipynb
def copy_model_parameters(sess, estimator1, estimator2):
"""
Copies the model parameters of one estimator to another.
Args:
sess: Tensorflow session instance
@AurelianTactics
AurelianTactics / periodic_target_update.py
Created Jan 9, 2019
TRFL Target Network Updating: periodic_target_update.py
View periodic_target_update.py
#in graph
target_network_update_ops = trfl.periodic_target_update(targetQN.get_qnetwork_variables(),mainQN.get_qnetwork_variables(),
update_period=2000,tau=1.0)
#in session
with tf.Session() as sess:
#....
for ep in range(1, train_episodes):
#...
#update target q network
@AurelianTactics
AurelianTactics / update_target_network_smalltau.py
Last active Jan 9, 2019
TRFL Target Network Updating: update_target_network_smalltau.py
View update_target_network_smalltau.py
#in graph
target_network_update_ops = trfl.update_target_variables(targetQN.get_qnetwork_variables(),
mainQN.get_qnetwork_variables(),tau=1.0/2000)
#in session
with tf.Session() as sess:
#....
for ep in range(1, train_episodes):
#...
#update target q network
@AurelianTactics
AurelianTactics / update_target_network_tau1.py
Last active Jan 9, 2019
TRFL Target Network Updating: update_target_network_tau1.py
View update_target_network_tau1.py
#in graph
#TRFL way
target_network_update_ops = trfl.update_target_variables(targetQN.get_qnetwork_variables(),mainQN.get_qnetwork_variables(),tau=1.0)
#in session
with tf.Session() as sess:
#...
for ep in range(1, train_episodes):
#...
#update target q network
@AurelianTactics
AurelianTactics / modify_q_network.py
Created Jan 9, 2019
TRFL Target Network Updating: Modify QNetwork
View modify_q_network.py
class QNetwork:
def __init__(self, name, learning_rate=0.01, state_size=4,
action_size=2, hidden_size=10, batch_size=20):
#same code here
#...
#method to get trainable variables for TRFL
def get_qnetwork_variables(self):
return [t for t in tf.trainable_variables() if t.name.startswith(self.name)]
View trfl_double_q_learning.py
#TRFL qlearning
#qloss, q_learning = trfl.qlearning(self.output,self.actions_,self.reward,self.discount,self.targetQs_)
#TRFL double qlearing
qloss, q_learning = trfl.double_qlearning(self.output,self.actions_,self.reward,self.discount,self.targetQs_,self.output)
self.loss = tf.reduce_mean(qloss)
self.opt = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
You can’t perform that action at this time.