A simple verification of an optimization from ICML 2005 by Strehl and Littman
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
from cvxopt import matrix | |
from cvxopt import solvers | |
solvers.options['show_progress'] = False | |
def dot(T,V): | |
return sum([T[i]*V[i] for i in range(len(V))]) | |
def constrain_max(T, V, eps): | |
if 1-T[V.index(max(V))] <= eps/2: | |
T_temp = [0 for i in T] | |
T_temp[V.index(max(V))] = 1 | |
return (max(V), T_temp) | |
else: | |
T_temp = list(T) | |
s_max = V.index(max(V)) | |
weight = eps/2.0 | |
T_temp[s_max] += weight | |
v_sort = sorted([(v,i) for i,v in enumerate(V)]) | |
for v,i in v_sort: | |
weight -= T_temp[i] | |
T_temp[i] = 0.0 | |
if weight < 0: | |
T_temp[i] += math.fabs(weight) | |
break | |
return (dot(T_temp,V), T_temp) | |
def l1_norm(t1, t2): | |
return sum([math.fabs(t1[i] - t2[i]) for i in range(len(t1))]) | |
def lp_constrain_max(T,V,eps): | |
# Original Linear Program | |
# max V'x | |
# s.t. | |
# ||x-T||_1 <= eps | |
# sum(x) == 1 | |
# all x_i > 0 | |
# Cannonical Form | |
# max V'x | |
# s.t. | |
# Ix-y <= T | |
# -Ix+y <= -T | |
# e'y <= eps | |
# -Ix <= 0 | |
# | |
# e'x == 1 | |
# where e is the vector of all ones and y was introduced to | |
# simulate the L1 norm converting ||x-T||_1 <= eps --> x-T <= y | |
# and x-T >= -y and e'y <= eps | |
# construct b | |
t1 = matrix(T) | |
t2 = -1*matrix(T) | |
t3 = matrix(0.0, (len(T),1)) | |
b = matrix([t1,t2,t3,eps]) | |
# construct c | |
t4 = -1*matrix(V,(len(V),1),'d') | |
t5 = matrix(0.0,(len(T),1),'d') | |
c = matrix([t4, t5]) | |
# construct A | |
# [I -I; -I -I; -I 0; 0 1] | |
A = matrix(0.0, (3*len(T)+1, len(T)+len(T))) | |
for i in range(len(T)): | |
# first row block | |
A[i,i] = 1.0 | |
A[i,i+len(T)] = -1.0 | |
# second row block | |
A[i+len(T),i] = -1.0 | |
A[i+len(T),i+len(T)] = -1.0 | |
# third row block | |
A[i+2*len(T),i] = -1.0 | |
# last row | |
A[3*len(T), i+len(T)] = 1.0 | |
# equalities | |
t6 = matrix(1.0, (len(T),1)) | |
t7 = matrix(0.0, (len(T),1)) | |
t8 = matrix([t6, t7]) | |
G = t8.T | |
h = matrix([1.0]) | |
sol=solvers.lp(c,A,b,G,h) | |
T_temp = list(sol['x'][0:len(T), 0]) | |
return (dot(T_temp,V), T_temp) | |
a = [1, 0, 0] | |
b = [0, 1, 0] | |
if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99: | |
print "Error: L1 norm failure" | |
a = [0, 0, 1] | |
b = [0, 1, 0] | |
if l1_norm(a,b) > 2.01 and l1_norm(a,b) < 1.99: | |
print "Error: L1 norm failure" | |
functions = [lp_constrain_max, constrain_max] | |
for x in range(1000): | |
# create random transitions and values | |
V = [random.uniform(-10,10) for i in range(10)] | |
temp = [random.random() for i in range(10)] | |
scale = sum(temp) | |
T = [t/float(scale) for t in temp] | |
if sum(T) < 0.99 or sum(T) > 1.01: | |
print "Error: Transition does not sum to one", sum(T) | |
e = random.uniform(0, 5) | |
values = [] | |
for f in functions: | |
v, t = f(T,V,e) | |
values.append(v) | |
if e - l1_norm(t, T) < -0.000001: | |
print "Error: L1 norm is greater than epsilon : ", l1_norm(t, T), ">", e, " in ",f | |
print "Old : ",T | |
print "New : ",t | |
if sum(t) < 0.99 or sum(t) > 1.01: | |
print "Error: New Transition does not sum to one ", t, " in ",f | |
if v < dot(T,V): | |
print "Error: New Value has not increased", dot(T,V), " > ", v, " in ",f | |
if math.fabs(values[0] - values[1]) > 0.1: | |
print "Algorithm mismatch" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment