Skip to content

Instantly share code, notes, and snippets.

@mattiasarro
Last active April 4, 2016 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattiasarro/6eb2b76d65a5768a026f0f3b196c230b to your computer and use it in GitHub Desktop.
Save mattiasarro/6eb2b76d65a5768a026f0f3b196c230b to your computer and use it in GitHub Desktop.
# Authors: Daniyal Shahrokhian, Mattias Arro
import numpy as np
from scipy.stats import bernoulli
# Generates n points(labels) accodring to the distribution of the exercise
def gen_points(x_values):
points = []
for x in x_values:
points.append(gen_point(x))
return points
def gen_point(x):
distrib = (x + 2.0 / 3) / 2
return bernoulli.rvs(distrib)
# Returns the value of 'x' that minimizes the empirical risk. For doing so,
# it calculates the risk of setting the boundary (t) to each value of x_values,
# and given the labels, it will keep the value of x that minimized the loss over all
# values
def gen_emprisk_minimizer(boundaries, x_values, labels):
emp_risk_minimizer = 0
min_emp_risk = float('inf')
for boundary in boundaries:
emp_risk = 0
for i in range(len(x_values)):
x = x_values[i]
real_label = labels[i]
pred_label = (1, 0)[x < boundary] # pythonic inline if assignment ;)
emp_risk += (0, 1)[real_label != pred_label]
if emp_risk < min_emp_risk:
emp_risk_minimizer = boundary
min_emp_risk = emp_risk
return emp_risk_minimizer
def excess_risk_from_formula(t):
excess_risk = float(t) - (float(1)/float(3))
excess_risk = excess_risk**2
excess_risk = excess_risk/2
return excess_risk
def excess_risk_discrete(x_values, t):
bayes_decision_boundary = float(1)/float(3)
if t == bayes_decision_boundary:
return 0
excess_risk = 0
p_x = float(1) / float(len(x_values))
for x in x_values:
x = float(x)
if (x < bayes_decision_boundary) & (x >= t): # t < bayes_decision_boundary
excess_risk += p_x
if (x > bayes_decision_boundary) & (x < t): # t > bayes_decision_boundary
excess_risk += p_x
return excess_risk
if __name__ == "__main__":
n = 100
x_values = np.arange(0, 1 + 1.0/n, 1.0/(n-1))
labels = gen_points(x_values)
#print "x values: " + str(x_values)
#print "generated labels(Y): " + str(labels)
print "\n"
print "######### F ##########"
#print "boundaries: " + str(x_values)
emp_risk_minimizer = gen_emprisk_minimizer(x_values, x_values, labels)
print "t: " + str(emp_risk_minimizer)
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer)
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer))
print "\n"
print "###### F_Q(3) #######"
Q = 3
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q))
#print "boundaries: " + str(boundaries)
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels)
print "t: " + str(emp_risk_minimizer)
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer)
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer))
print "\n"
print "###### F_Q(20) #######"
Q = 20
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q))
#print "boundaries: " + str(boundaries)
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels)
print "t: " + str(emp_risk_minimizer)
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer)
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer))
print "\n"
print "###### F_Q(100) #######"
Q = 100
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q))
#print "boundaries: " + str(boundaries)
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels)
print "t: " + str(emp_risk_minimizer)
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer)
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment