Skip to content

Instantly share code, notes, and snippets.

@cmansley
Created August 6, 2011 00:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cmansley/1128815 to your computer and use it in GitHub Desktop.
Save cmansley/1128815 to your computer and use it in GitHub Desktop.
A simple verification of an optimization from ICML 2005 by Strehl and Littman
import math
import random
from cvxopt import matrix
from cvxopt import solvers
solvers.options['show_progress'] = False
def dot(T,V):
return sum([T[i]*V[i] for i in range(len(V))])
def constrain_max(T, V, eps):
if 1-T[V.index(max(V))] <= eps/2:
T_temp = [0 for i in T]
T_temp[V.index(max(V))] = 1
return (max(V), T_temp)
else:
T_temp = list(T)
s_max = V.index(max(V))
weight = eps/2.0
T_temp[s_max] += weight
v_sort = sorted([(v,i) for i,v in enumerate(V)])
for v,i in v_sort:
weight -= T_temp[i]
T_temp[i] = 0.0
if weight < 0:
T_temp[i] += math.fabs(weight)
break
return (dot(T_temp,V), T_temp)
def l1_norm(t1, t2):
return sum([math.fabs(t1[i] - t2[i]) for i in range(len(t1))])
def lp_constrain_max(T,V,eps):
# Original Linear Program
# max V'x
# s.t.
# ||x-T||_1 <= eps
# sum(x) == 1
# all x_i > 0
# Cannonical Form
# max V'x
# s.t.
# Ix-y <= T
# -Ix+y <= -T
# e'y <= eps
# -Ix <= 0
#
# e'x == 1
# where e is the vector of all ones and y was introduced to
# simulate the L1 norm converting ||x-T||_1 <= eps --> x-T <= y
# and x-T >= -y and e'y <= eps
# construct b
t1 = matrix(T)
t2 = -1*matrix(T)
t3 = matrix(0.0, (len(T),1))
b = matrix([t1,t2,t3,eps])
# construct c
t4 = -1*matrix(V,(len(V),1),'d')
t5 = matrix(0.0,(len(T),1),'d')
c = matrix([t4, t5])
# construct A
# [I -I; -I -I; -I 0; 0 1]
A = matrix(0.0, (3*len(T)+1, len(T)+len(T)))
for i in range(len(T)):
# first row block
A[i,i] = 1.0
A[i,i+len(T)] = -1.0
# second row block
A[i+len(T),i] = -1.0
A[i+len(T),i+len(T)] = -1.0
# third row block
A[i+2*len(T),i] = -1.0
# last row
A[3*len(T), i+len(T)] = 1.0
# equalities
t6 = matrix(1.0, (len(T),1))
t7 = matrix(0.0, (len(T),1))
t8 = matrix([t6, t7])
G = t8.T
h = matrix([1.0])
sol=solvers.lp(c,A,b,G,h)
T_temp = list(sol['x'][0:len(T), 0])
return (dot(T_temp,V), T_temp)
a = [1, 0, 0]
b = [0, 1, 0]
if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99:
print "Error: L1 norm failure"
a = [0, 0, 1]
b = [0, 1, 0]
if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99:
print "Error: L1 norm failure"
functions = [lp_constrain_max, constrain_max]
for x in range(1000):
# create random transitions and values
V = [random.uniform(-10,10) for i in range(10)]
temp = [random.random() for i in range(10)]
scale = sum(temp)
T = [t/float(scale) for t in temp]
if sum(T) < 0.99 or sum(T) > 1.01:
print "Error: Transition does not sum to one", sum(T)
e = random.uniform(0, 5)
values = []
for f in functions:
v, t = f(T,V,e)
values.append(v)
if e - l1_norm(t, T) < -0.000001:
print "Error: L1 norm is greater than epsilon : ", l1_norm(t, T), ">", e, " in ",f
print "Old : ",T
print "New : ",t
if sum(t) < 0.99 or sum(t) > 1.01:
print "Error: New Transition does not sum to one ", t, " in ",f
if v < dot(T,V):
print "Error: New Value has not increased", dot(T,V), " > ", v, " in ",f
if math.fabs(values[0] - values[1]) > 0.1:
print "Algorithm mismatch"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment