jangirrishabh/toyCarIRL5.py

## toyCarIRL5.py
    def optimization(self): # implement the convex optimization, posed as an SVM problem
        m = len(self.expertPolicy)
        P = matrix(2.0*np.eye(m), tc='d') # min ||w||
        q = matrix(np.zeros(m), tc='d')
        policyList = [self.expertPolicy]
        h_list = [1]
        for i in self.policiesFE.keys():
            policyList.append(self.policiesFE[i])
            h_list.append(1)
        policyMat = np.matrix(policyList)
        policyMat[0] = -1*policyMat[0]
        G = matrix(policyMat, tc='d')
        h = matrix(-np.array(h_list), tc='d')
        sol = solvers.qp(P,q,G,h)

        weights = np.squeeze(np.asarray(sol['x']))
        norm = np.linalg.norm(weights)
        weights = weights/norm
        return weights # return the normalized weights
	def optimization(self): # implement the convex optimization, posed as an SVM problem
	m = len(self.expertPolicy)
	P = matrix(2.0*np.eye(m), tc='d') # min \|\|w\|\|
	q = matrix(np.zeros(m), tc='d')
	policyList = [self.expertPolicy]
	h_list = [1]
	for i in self.policiesFE.keys():
	policyList.append(self.policiesFE[i])
	h_list.append(1)
	policyMat = np.matrix(policyList)
	policyMat[0] = -1*policyMat[0]
	G = matrix(policyMat, tc='d')
	h = matrix(-np.array(h_list), tc='d')
	sol = solvers.qp(P,q,G,h)

	weights = np.squeeze(np.asarray(sol['x']))
	norm = np.linalg.norm(weights)
	weights = weights/norm
	return weights # return the normalized weights