Skip to content

Instantly share code, notes, and snippets.

# cmansley/mbie.py Created Aug 6, 2011

A simple verification of an optimization from ICML 2005 by Strehl and Littman
 import math import random from cvxopt import matrix from cvxopt import solvers solvers.options['show_progress'] = False def dot(T,V): return sum([T[i]*V[i] for i in range(len(V))]) def constrain_max(T, V, eps): if 1-T[V.index(max(V))] <= eps/2: T_temp = [0 for i in T] T_temp[V.index(max(V))] = 1 return (max(V), T_temp) else: T_temp = list(T) s_max = V.index(max(V)) weight = eps/2.0 T_temp[s_max] += weight v_sort = sorted([(v,i) for i,v in enumerate(V)]) for v,i in v_sort: weight -= T_temp[i] T_temp[i] = 0.0 if weight < 0: T_temp[i] += math.fabs(weight) break return (dot(T_temp,V), T_temp) def l1_norm(t1, t2): return sum([math.fabs(t1[i] - t2[i]) for i in range(len(t1))]) def lp_constrain_max(T,V,eps): # Original Linear Program # max V'x # s.t. # ||x-T||_1 <= eps # sum(x) == 1 # all x_i > 0 # Cannonical Form # max V'x # s.t. # Ix-y <= T # -Ix+y <= -T # e'y <= eps # -Ix <= 0 # # e'x == 1 # where e is the vector of all ones and y was introduced to # simulate the L1 norm converting ||x-T||_1 <= eps --> x-T <= y # and x-T >= -y and e'y <= eps # construct b t1 = matrix(T) t2 = -1*matrix(T) t3 = matrix(0.0, (len(T),1)) b = matrix([t1,t2,t3,eps]) # construct c t4 = -1*matrix(V,(len(V),1),'d') t5 = matrix(0.0,(len(T),1),'d') c = matrix([t4, t5]) # construct A # [I -I; -I -I; -I 0; 0 1] A = matrix(0.0, (3*len(T)+1, len(T)+len(T))) for i in range(len(T)): # first row block A[i,i] = 1.0 A[i,i+len(T)] = -1.0 # second row block A[i+len(T),i] = -1.0 A[i+len(T),i+len(T)] = -1.0 # third row block A[i+2*len(T),i] = -1.0 # last row A[3*len(T), i+len(T)] = 1.0 # equalities t6 = matrix(1.0, (len(T),1)) t7 = matrix(0.0, (len(T),1)) t8 = matrix([t6, t7]) G = t8.T h = matrix([1.0]) sol=solvers.lp(c,A,b,G,h) T_temp = list(sol['x'][0:len(T), 0]) return (dot(T_temp,V), T_temp) a = [1, 0, 0] b = [0, 1, 0] if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99: print "Error: L1 norm failure" a = [0, 0, 1] b = [0, 1, 0] if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99: print "Error: L1 norm failure" functions = [lp_constrain_max, constrain_max] for x in range(1000): # create random transitions and values V = [random.uniform(-10,10) for i in range(10)] temp = [random.random() for i in range(10)] scale = sum(temp) T = [t/float(scale) for t in temp] if sum(T) < 0.99 or sum(T) > 1.01: print "Error: Transition does not sum to one", sum(T) e = random.uniform(0, 5) values = [] for f in functions: v, t = f(T,V,e) values.append(v) if e - l1_norm(t, T) < -0.000001: print "Error: L1 norm is greater than epsilon : ", l1_norm(t, T), ">", e, " in ",f print "Old : ",T print "New : ",t if sum(t) < 0.99 or sum(t) > 1.01: print "Error: New Transition does not sum to one ", t, " in ",f if v < dot(T,V): print "Error: New Value has not increased", dot(T,V), " > ", v, " in ",f if math.fabs(values - values) > 0.1: print "Algorithm mismatch"
to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.