Skip to content

Instantly share code, notes, and snippets.

🎯
Focusing

Rishabh Jangir jangirrishabh

🎯
Focusing
View GitHub Profile
@jangirrishabh
jangirrishabh / toyCarIRL5.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL5.py
def optimization(self): # implement the convex optimization, posed as an SVM problem
m = len(self.expertPolicy)
P = matrix(2.0*np.eye(m), tc='d') # min ||w||
q = matrix(np.zeros(m), tc='d')
policyList = [self.expertPolicy]
h_list = [1]
for i in self.policiesFE.keys():
policyList.append(self.policiesFE[i])
h_list.append(1)
policyMat = np.matrix(policyList)
@jangirrishabh
jangirrishabh / toyCarIRL6.py
Created Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL6.py
if __name__ == '__main__':
logger = logging.getLogger()
logger.setLevel(logging.INFO)
randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348]
# ^the random policy feature expectations
expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00]
# ^feature expectations for the "follow Yellow obstacles" behavior
expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17]
# ^feature expectations for the follow Red obstacles behavior
expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]
@jangirrishabh
jangirrishabh / toyCarIRL.py
Last active Jun 14, 2018
Snippet for toyCarIRL, blog usage, not executable
View toyCarIRL.py
# IRL algorith developed for the toy car obstacle avoidance problem for testing.
import numpy as np
import logging
import scipy
from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
from nn import neural_net #construct the nn and send to playing
from cvxopt import matrix
from cvxopt import solvers #convex optimization library
from flat_game import carmunk # get the environment
from learning import IRL_helper # get the Reinforcement learner
@jangirrishabh
jangirrishabh / ddpg2.py
Last active Jul 13, 2018
Snippet for using demonstrations in ddpg.py agent, blog usage, not executable
View ddpg2.py
self.lambda1 = 0.001
self.lambda2 = 0.0078
def _create_network(self, reuse=False):
mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0)
target_Q_pi_tf = self.target.Q_pi_tf
clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf)
target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, *clip_range) # y = r + gamma*Q(pi)
@jangirrishabh
jangirrishabh / ddpg1.py
Last active Jul 13, 2018
Snippet for using demonstrations in ddpg.py agent, blog usage, not executable
View ddpg1.py
self.demo_batch_size = 128
def initDemoBuffer(self, demoDataFile, update_stats=True):
#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data.
def sample_batch(self):
if self.bc_loss:
transitions = self.buffer.sample(self.batch_size - self.demo_batch_size)
global demoBuffer
View poleCart_RL.py
""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """
import logging
import os
#import tempfile
import numpy as np
import gym
class EpisodicAgent(object):
def __init__(self, action_space):
View inverseCart.py
import gym
import numpy as np
import cv2, math
import logging
import os
import scipy
from numpy import linalg as LA
from matplotlib import pyplot as plt
%matplotlib inline
from poleCart_RL import EpisodicAgent #get the RL agent
View poleCart_manual.py
#import gym
#env = gym.make('MountainCar-v0')
#env = gym.make('CartPole-v0')
#env = gym.make('MsPacman-v0')
import gym # get the environment from openAI
import curses # for keypress
You can’t perform that action at this time.