Skip to content

Instantly share code, notes, and snippets.

View jangirrishabh's full-sized avatar
🎯
Focusing

Rishabh Jangir jangirrishabh

🎯
Focusing
View GitHub Profile
#import gym
#env = gym.make('MountainCar-v0')
#env = gym.make('CartPole-v0')
#env = gym.make('MsPacman-v0')
import gym # get the environment from openAI
import curses # for keypress
import gym
import numpy as np
import cv2, math
import logging
import os
import scipy
from numpy import linalg as LA
from matplotlib import pyplot as plt
%matplotlib inline
from poleCart_RL import EpisodicAgent #get the RL agent
""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """
import logging
import os
#import tempfile
import numpy as np
import gym
class EpisodicAgent(object):
def __init__(self, action_space):
@jangirrishabh
jangirrishabh / ddpg1.py
Last active July 13, 2018 13:25
Snippet for using demonstrations in ddpg.py agent, blog usage, not executable
self.demo_batch_size = 128
def initDemoBuffer(self, demoDataFile, update_stats=True):
#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data.
def sample_batch(self):
if self.bc_loss:
transitions = self.buffer.sample(self.batch_size - self.demo_batch_size)
global demoBuffer
@jangirrishabh
jangirrishabh / ddpg2.py
Last active July 13, 2018 13:24
Snippet for using demonstrations in ddpg.py agent, blog usage, not executable
self.lambda1 = 0.001
self.lambda2 = 0.0078
def _create_network(self, reuse=False):
mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0)
target_Q_pi_tf = self.target.Q_pi_tf
clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf)
target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, *clip_range) # y = r + gamma*Q(pi)
@jangirrishabh
jangirrishabh / toyCarIRL.py
Last active June 14, 2018 11:39
Snippet for toyCarIRL, blog usage, not executable
# IRL algorith developed for the toy car obstacle avoidance problem for testing.
import numpy as np
import logging
import scipy
from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
from nn import neural_net #construct the nn and send to playing
from cvxopt import matrix
from cvxopt import solvers #convex optimization library
from flat_game import carmunk # get the environment
from learning import IRL_helper # get the Reinforcement learner
@jangirrishabh
jangirrishabh / toyCarIRL6.py
Created June 14, 2018 11:41
Snippet for toyCarIRL, blog usage, not executable
if __name__ == '__main__':
logger = logging.getLogger()
logger.setLevel(logging.INFO)
randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348]
# ^the random policy feature expectations
expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00]
# ^feature expectations for the "follow Yellow obstacles" behavior
expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17]
# ^feature expectations for the follow Red obstacles behavior
expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]
@jangirrishabh
jangirrishabh / toyCarIRL5.py
Created June 14, 2018 11:41
Snippet for toyCarIRL, blog usage, not executable
def optimization(self): # implement the convex optimization, posed as an SVM problem
m = len(self.expertPolicy)
P = matrix(2.0*np.eye(m), tc='d') # min ||w||
q = matrix(np.zeros(m), tc='d')
policyList = [self.expertPolicy]
h_list = [1]
for i in self.policiesFE.keys():
policyList.append(self.policiesFE[i])
h_list.append(1)
policyMat = np.matrix(policyList)
@jangirrishabh
jangirrishabh / toyCarIRL4.py
Created June 14, 2018 11:42
Snippet for toyCarIRL, blog usage, not executable
def policyListUpdater(self, W, i): #add the policyFE list and differences
tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights
hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t
self.policiesFE[hyperDistance] = tempFE
return hyperDistance # t = (weights.tanspose)*(expert-newPolicy)
@jangirrishabh
jangirrishabh / toyCarIRL4.py
Created June 14, 2018 11:42
Snippet for toyCarIRL, blog usage, not executable
def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent
IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below
saved_model = 'saved-models_'+self.behavior+str(i)+'/164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE
model = neural_net(self.num_states, [164, 150], saved_model)
return play(model, W)#return feature expectations by executing the learned policy