Skip to content

Instantly share code, notes, and snippets.

@jangirrishabh
Created June 21, 2016 18:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jangirrishabh/b4dd7bd39de862d28a49a467b2df672b to your computer and use it in GitHub Desktop.
Save jangirrishabh/b4dd7bd39de862d28a49a467b2df672b to your computer and use it in GitHub Desktop.
import gym
import numpy as np
import cv2, math
import logging
import os
import scipy
from numpy import linalg as LA
from matplotlib import pyplot as plt
%matplotlib inline
from poleCart_RL import EpisodicAgent #get the RL agent
from poleCart_manual import expertFeatures
from cvxopt import matrix
from cvxopt import solvers
class irlAgent:
def __init__(self, gymEnv, rlEpisodes, rlMaxSteps): #initial constructor sorta function
self.env = gymEnv
self.episodesRL = rlEpisodes
self.maxStepsRL = rlMaxSteps
self.randomPolicy = [6.33159868, 22.18457058, 24.07697606, 64.68426447, 15.92186349] # random initialization
self.expertPolicy = [1.40327044 , 12.06541251 , 1.39011785 , 15.70455323 , 19.99994606] #human generated get it straight
#self.expertPolicy = [ 24.90898925 , 66.21544503 , 28.48223649 , 80.00899435 , 19.67509372] # human generated get max displacement
#self.expertPolicy = [ 1.38076217 , 3.6306461 , 0.79024451 , 3.27657669, 20.99918233] # machine generated get it straight
self.epsilon = 1.0
self.policiesFE = {np.linalg.norm(np.asarray(self.expertPolicy)-np.asarray(self.randomPolicy)):self.randomPolicy}
def getRLAgentFE(self, W): #get the feature expectations of a new poliicy using RL agent
agent = EpisodicAgent(self.env.action_space)
return agent.reinforce(self.env, W, self.episodesRL, self.maxStepsRL) #return feature expectations
def policyListUpdater(self, W): #update the policyFE list and differences upon arrival of a new weight(policy)
for i in self.policiesFE.keys():
temp = np.abs(np.dot(W, (np.asarray(self.expertPolicy)-np.asarray(self.policiesFE[i]))))
if temp != i:
self.policiesFE[temp] = self.policiesFE[i]
del self.policiesFE[i]
tempFE = self.getRLAgentFE(W)
self.policiesFE[np.abs(np.dot(W, (np.asarray(self.expertPolicy)-np.asarray(tempFE))))] = tempFE
#self.policiesFE[np.linalg.norm(np.asarray(self.expertPolicy)-np.asarray(tempFE))] = tempFE
#self.policiesFE[np.dot(W, (np.asarray(self.expertPolicy)-np.asarray(tempFE)))] = tempFE
def optimalWeightFinder(self):
t_prev = 0
while True:
W = self.optimization(np.matrix(self.expertPolicy) - np.matrix(self.policiesFE[min(self.policiesFE)]))
print " The minimum thing : ", min(self.policiesFE)
print " The sent Feature Expec : ", np.matrix(self.policiesFE[min(self.policiesFE)])
#t = np.abs(np.dot(W, np.asarray(self.expertPolicy) - np.asarray(self.policiesFE[min(self.policiesFE)])))
t = np.dot(W, np.asarray(self.expertPolicy) - np.asarray(self.policiesFE[min(self.policiesFE)]))
#print np.squeeze(np.asarray(np.matrix(self.expertPolicy) - np.matrix(self.policiesFE[min(self.policiesFE)])))
if np.abs(t) <= 1.0 + self.epsilon:
break
#if np.abs(t-t_prev) < self.epsilon:
#break
self.policyListUpdater(W)
t_prev = t
print " the t value :: ", np.abs(t)
print "The KEYS::" , self.policiesFE.keys()
print "weights ", W
return W
def optimization(self, difference):
P = matrix(2.0*np.eye(5), tc='d')
q = matrix(np.zeros(5), tc='d')
#G = matrix((np.matrix(self.expertPolicy) - np.matrix(self.randomPolicy)), tc='d')
G = matrix(-difference, tc='d')
h = matrix(np.array([-1]), tc='d')
sol = solvers.qp(P,q,G,h)
#print sol['status']
#return sol['x']
weights = np.squeeze(np.asarray(sol['x']))
norm = np.linalg.norm(weights)
weights = weights/norm
return weights
if __name__ == '__main__':
logger = logging.getLogger()
logger.setLevel(logging.INFO)
rlEpisodes = 100
rlMaxSteps = 250
#W = [-0.9, -0.9, -0.9, -0.9, 1]
env = gym.make('CartPole-v0')
irlearner = irlAgent(env, rlEpisodes, rlMaxSteps)
#print irlearner.policiesFE
#irlearner.policyListUpdater(W)
#print irlearner.rlAgentFeatureExpecs(W)
#print irlearner.expertFeatureExpecs()
print irlearner.optimalWeightFinder()
#print irlearner.optimization(20)
#np.squeeze(np.asarray(M))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment