Rishabh Jangir jangirrishabh

## toyCarIRL5.py
    def optimization(self): # implement the convex optimization, posed as an SVM problem
        m = len(self.expertPolicy)
        P = matrix(2.0*np.eye(m), tc='d') # min ||w||
        q = matrix(np.zeros(m), tc='d')
        policyList = [self.expertPolicy]
        h_list = [1]
        for i in self.policiesFE.keys():
            policyList.append(self.policiesFE[i])
            h_list.append(1)
        policyMat = np.matrix(policyList)

## toyCarIRL6.py
if __name__ == '__main__':
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194  , 0.39292849 , 2.0488831  , 0.65611318 , 6.90207523 , 2.46475348]
    # ^the random policy feature expectations
    expertPolicyYellowFE = [7.5366e+00,  4.6350e+00  , 7.4421e+00, 3.1817e-01,  8.3398e+00,  1.3710e-08,  1.3419e+00 ,  0.0000e+00]
    # ^feature expectations for the "follow Yellow obstacles" behavior
    expertPolicyRedFE = [7.9100e+00, 5.3745e-01,  5.2363e+00, 2.8652e+00,  3.3120e+00, 3.6478e-06, 3.82276074e+00  , 1.0219e-17]
    # ^feature expectations for the follow Red obstacles behavior
    expertPolicyBrownFE = [5.2210e+00,  5.6980e+00,  7.7984e+00,  4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]

## toyCarIRL.py
# IRL algorith developed for the toy car obstacle avoidance problem for testing.
import numpy as np
import logging
import scipy
from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
from nn import neural_net #construct the nn and send to playing
from cvxopt import matrix
from cvxopt import solvers #convex optimization library
from flat_game import carmunk # get the environment
from learning import IRL_helper # get the Reinforcement learner

## ddpg2.py
self.lambda1 = 0.001
self.lambda2 =  0.0078

def _create_network(self, reuse=False):

    mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0)

    target_Q_pi_tf = self.target.Q_pi_tf
    clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf)
    target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, *clip_range) # y = r + gamma*Q(pi)

## ddpg1.py
self.demo_batch_size = 128

def initDemoBuffer(self, demoDataFile, update_stats=True):
#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data.

def sample_batch(self):
    if self.bc_loss:
        transitions = self.buffer.sample(self.batch_size - self.demo_batch_size)
        global demoBuffer

## poleCart_RL.py
""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """

import logging
import os
#import tempfile
import numpy as np
import gym

class EpisodicAgent(object):
    def __init__(self, action_space):

## inverseCart.py
import gym
import numpy as np
import cv2, math
import logging
import os
import scipy
from numpy import linalg as LA
from matplotlib import pyplot as plt
%matplotlib inline
from poleCart_RL import EpisodicAgent #get the RL agent

## poleCart_manual.py
#import gym

#env = gym.make('MountainCar-v0')
#env = gym.make('CartPole-v0')

#env = gym.make('MsPacman-v0')

import gym # get the environment from openAI
import curses # for keypress
	def optimization(self): # implement the convex optimization, posed as an SVM problem
	m = len(self.expertPolicy)
	P = matrix(2.0*np.eye(m), tc='d') # min \|\|w\|\|
	q = matrix(np.zeros(m), tc='d')
	policyList = [self.expertPolicy]
	h_list = [1]
	for i in self.policiesFE.keys():
	policyList.append(self.policiesFE[i])
	h_list.append(1)
	policyMat = np.matrix(policyList)
	if __name__ == '__main__':
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	randomPolicyFE = [ 7.74363107 , 4.83296402 , 6.1289194 , 0.39292849 , 2.0488831 , 0.65611318 , 6.90207523 , 2.46475348]
	# ^the random policy feature expectations
	expertPolicyYellowFE = [7.5366e+00, 4.6350e+00 , 7.4421e+00, 3.1817e-01, 8.3398e+00, 1.3710e-08, 1.3419e+00 , 0.0000e+00]
	# ^feature expectations for the "follow Yellow obstacles" behavior
	expertPolicyRedFE = [7.9100e+00, 5.3745e-01, 5.2363e+00, 2.8652e+00, 3.3120e+00, 3.6478e-06, 3.82276074e+00 , 1.0219e-17]
	# ^feature expectations for the follow Red obstacles behavior
	expertPolicyBrownFE = [5.2210e+00, 5.6980e+00, 7.7984e+00, 4.8440e-01, 2.0885e-04, 9.2215e+00, 2.9386e-01 , 4.8498e-17]
	# IRL algorith developed for the toy car obstacle avoidance problem for testing.
	import numpy as np
	import logging
	import scipy
	from playing import play #get the RL Test agent, gives out feature expectations after 2000 frames
	from nn import neural_net #construct the nn and send to playing
	from cvxopt import matrix
	from cvxopt import solvers #convex optimization library
	from flat_game import carmunk # get the environment
	from learning import IRL_helper # get the Reinforcement learner
	self.lambda1 = 0.001
	self.lambda2 = 0.0078

	def _create_network(self, reuse=False):

	mask = np.concatenate((np.zeros(self.batch_size - self.demo_batch_size), np.ones(self.demo_batch_size)), axis = 0)

	target_Q_pi_tf = self.target.Q_pi_tf
	clip_range = (-self.clip_return, 0. if self.clip_pos_returns else np.inf)
	target_tf = tf.clip_by_value(batch_tf['r'] + self.gamma * target_Q_pi_tf, clip_range) # y = r + gammaQ(pi)
	self.demo_batch_size = 128

	def initDemoBuffer(self, demoDataFile, update_stats=True):
	#To initiaze the demobuffer with the recorded demonstration data. We also normalize the demo data.

	def sample_batch(self):
	if self.bc_loss:
	transitions = self.buffer.sample(self.batch_size - self.demo_batch_size)
	global demoBuffer
	""" Quick script for an "Episodic Controller" Agent, i.e. nearest neighbor """

	import logging
	import os
	#import tempfile
	import numpy as np
	import gym

	class EpisodicAgent(object):
	def __init__(self, action_space):
	import gym
	import numpy as np
	import cv2, math
	import logging
	import os
	import scipy
	from numpy import linalg as LA
	from matplotlib import pyplot as plt
	%matplotlib inline
	from poleCart_RL import EpisodicAgent #get the RL agent
	#import gym

	#env = gym.make('MountainCar-v0')
	#env = gym.make('CartPole-v0')

	#env = gym.make('MsPacman-v0')

	import gym # get the environment from openAI
	import curses # for keypress