Skip to content

Instantly share code, notes, and snippets.

🎯
Focusing

Rishabh Jangir jangirrishabh

🎯
Focusing
View GitHub Profile
View poleCart_manual.py
#import gym
#env = gym.make('MountainCar-v0')
#env = gym.make('CartPole-v0')
#env = gym.make('MsPacman-v0')
import gym # get the environment from openAI
import curses # for keypress
View inverseCart.py
import gym
import numpy as np
import cv2, math
import logging
import os
import scipy
from numpy import linalg as LA
from matplotlib import pyplot as plt
%matplotlib inline
from poleCart_RL import EpisodicAgent #get the RL agent
View poleCart_RL.py
""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """
import logging
import os
#import tempfile
import numpy as np
import gym
class EpisodicAgent(object):
def __init__(self, action_space):
@jangirrishabh
jangirrishabh / toyCarIRL.py
Last active Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL.py
# IRL algorith developed for the toy car obstacle avoidance problem for testing.
import numpy as np
import logging
import scipy
from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
from nn import neural_net #construct the nn and send to playing
from cvxopt import matrix
from cvxopt import solvers #convex optimization library
from flat_game import carmunk # get the environment
from learning import IRL_helper # get the Reinforcement learner
@jangirrishabh
jangirrishabh / toyCarIRL6.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL6.py
if __name__ == '__main__':
logger = logging.getLogger()
logger.setLevel(logging.INFO)
randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348]
# ^the random policy feature expectations
expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00]
# ^feature expectations for the "follow Yellow obstacles" behavior
expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17]
# ^feature expectations for the follow Red obstacles behavior
expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]
@jangirrishabh
jangirrishabh / toyCarIRL5.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL5.py
def optimization(self): # implement the convex optimization, posed as an SVM problem
m = len(self.expertPolicy)
P = matrix(2.0*np.eye(m), tc='d') # min ||w||
q = matrix(np.zeros(m), tc='d')
policyList = [self.expertPolicy]
h_list = [1]
for i in self.policiesFE.keys():
policyList.append(self.policiesFE[i])
h_list.append(1)
policyMat = np.matrix(policyList)
@jangirrishabh
jangirrishabh / toyCarIRL4.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL4.py
def policyListUpdater(self, W, i): #add the policyFE list and differences
tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights
hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t
self.policiesFE[hyperDistance] = tempFE
return hyperDistance # t = (weights.tanspose)*(expert-newPolicy)
@jangirrishabh
jangirrishabh / toyCarIRL4.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL4.py
def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent
IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below
saved_model = 'saved-models_'+self.behavior+str(i)+'/164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE
model = neural_net(self.num_states, [164, 150], saved_model)
return play(model, W)#return feature expectations by executing the learned policy
@jangirrishabh
jangirrishabh / toyCarIRL3.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL3.py
class irlAgent:
def __init__(self, randomFE, expertFE, epsilon, num_states, num_frames, behavior):
self.randomPolicy = randomFE
self.expertPolicy = expertFE
self.num_states = num_states
self.num_frames = num_frames
self.behavior = behavior
self.epsilon = epsilon # termination when t < 0.1
self.randomT = np.linalg.norm(np.asarray(self.expertPolicy)-np.asarray(self.randomPolicy)) #norm of the diff in expert and random
self.policiesFE = {self.randomT:self.randomPolicy} # storing the policies and their respective t values in a dictionary
@jangirrishabh
jangirrishabh / toyCarIRL2.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL2.py
def optimalWeightFinder(self):
i = 1
while True:
W = self.optimization() # optimize to find new weights in the list of policies
print ("weights ::", W )
print ("the distances ::", self.policiesFE.keys())
self.currentT = self.policyListUpdater(W, i)
print ("Current distance (t) is:: ", self.currentT )
if self.currentT <= self.epsilon: # terminate if the point reached close enough
break
You can’t perform that action at this time.