Skip to content

Instantly share code, notes, and snippets.

View sol0invictus's full-sized avatar

Sunny Guha sol0invictus

View GitHub Profile
layers = [
featureInputLayer(2,"Name","input_1")
fullyConnectedLayer(24,"Name","fc_1")
reluLayer("Name","relu_1")
fullyConnectedLayer(48,"Name","fc_2")
reluLayer("Name","relu_2")
fullyConnectedLayer(3,"Name","output")
regressionLayer("Name","RepresentationLoss")];
classdef mountain_car_1 < rl.env.MATLABEnvironment
properties
open_env = py.gym.make('MountainCar-v0');
end
methods
function this = mountain_car_1()
ObservationInfo = rlNumericSpec([2 1]);
ObservationInfo.Name = 'MountainCar Descreet';
ObservationInfo.Description = 'Position, Velocity';
ActionInfo = rlFiniteSetSpec([0 1 2]);
@sol0invictus
sol0invictus / DDPG-buffer.py
Created May 30, 2020 18:38
DDPG - Replay Buffer
class BasicBuffer:
def __init__(self, size, obs_dim, act_dim):
self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float32)
self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float32)
self.acts_buf = np.zeros([size, act_dim], dtype=np.float32)
self.rews_buf = np.zeros([size], dtype=np.float32)
self.done_buf = np.zeros([size], dtype=np.float32)
self.ptr, self.size, self.max_size = 0, 0, size
@sol0invictus
sol0invictus / DDPG-update.py
Created May 30, 2020 17:00
DDPG - target update
## Updating both netwokrs
## updating Critic network
temp1 = np.array(q_mu_target.get_weights())
temp2 = np.array(q_mu.get_weights())
temp3 = decay*temp1 + (1-decay)*temp2
q_mu_target.set_weights(temp3)
# updating Actor network
temp1 = np.array(mu_target.get_weights())
X,A,R,X2,D = replay_buffer.sample(batch_size)
X = np.asarray(X,dtype=np.float32)
A = np.asarray(A,dtype=np.float32)
R = np.asarray(R,dtype=np.float32)
X2 = np.asarray(X2,dtype=np.float32)
D = np.asarray(D,dtype=np.float32)
Xten=tf.convert_to_tensor(X)
#Actor optimization
with tf.GradientTape() as tape2:
@sol0invictus
sol0invictus / ddpg-modearch.py
Created May 29, 2020 18:23
Actor and Critic Models for DDPG Blog
# Network parameters
X_shape = (num_states)
QA_shape = (num_states + num_actions)
hidden_sizes_1=(1000,500,200)
hidden_sizes_2=(400,200)
# Main network outputs
mu = ANN2(X_shape,list(hidden_sizes_1)+[num_actions], hidden_activation='relu', output_activation='tanh')
q_mu = ANN2(QA_shape, list(hidden_sizes_2)+[1], hidden_activation='relu')
@sol0invictus
sol0invictus / ddpg-model.py
Created May 29, 2020 17:42
DDPG Blog - Model generation function
def ANN2(input_shape,layer_sizes, hidden_activation='relu', output_activation=None):
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=input_shape))
for h in layer_sizes[:-1]:
x = model.add(tf.keras.layers.Dense(units=h, activation='relu'))
model.add(tf.keras.layers.Dense(units=layer_sizes[-1], activation=output_activation))
return model
# We will look at two different implementations.
# The first one is straightforward
model = Sequential()
model.add(Conv2D(100,3,padding='valid',activation='relu',strides=1,input_shape=(52,52, 1)))
model.add(Conv2D(1,1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
def get_grad(inputs,targets,parameters):
with tf.GradientTape() as tape:
# calculate the loss
Z3=forward_propagation(inputs, parameters)
loss_value = compute_cost(Z3, targets)
# return gradient
return [tape.gradient(loss_value, list(parameters.values())),loss_value]
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True):
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)