Created
June 15, 2013 23:16
-
-
Save sharnett/5789979 to your computer and use it in GitHub Desktop.
Newton's method for beta-binomial maximum likelihood estimation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package Newton; | |
import org.apache.commons.math3.special.Gamma; | |
/** | |
* User: srharnett | |
* Date: 6/15/13 | |
* Time: 6:08 PM | |
*/ | |
public class CVRLikelihood { | |
private double[] leads; | |
private int[] sessions; | |
private static double lambda = 1e-6; | |
private static double[] xstart = {3,17}; | |
public CVRLikelihood(double[] leads, int[] sessions) { | |
this.leads = leads; | |
this.sessions = sessions; | |
} | |
private static double lg(double x) { return Gamma.logGamma(x); } | |
private static double dg(double x) { return Gamma.digamma(x); } | |
private static double tg(double x) { return Gamma.trigamma(x); } | |
public double logLikelihood(double a, double b) { | |
double[] k = leads; | |
int[] n = sessions; | |
if (a<=0 || b<=0) | |
return Double.NEGATIVE_INFINITY; | |
double total = 0; | |
for (int i=0; i<k.length; i++) | |
total += lg(a+b)-lg(a)-lg(b)+lg(a+k[i])+lg(b+n[i]-k[i])-lg(a+b+n[i]); | |
return total; | |
} | |
public double[] logLikelihoodGradient(double a, double b) { | |
double[] k = leads; | |
int[] n = sessions; | |
double Da=0, Db=0; | |
for (int i=0; i<k.length; i++) { | |
Da += dg(a+b)-dg(a)+dg(a+k[i])-dg(a+b+n[i]); | |
Db += dg(a+b)-dg(b)+dg(b+n[i]-k[i])-dg(a+b+n[i]); | |
} | |
return new double[]{Da,Db}; | |
} | |
public double[][] logLikelihoodHessian(double a, double b) { | |
double[] k = leads; | |
int[] n = sessions; | |
double Daa=0, Dab=0, Dbb=0; | |
for (int i=0; i<k.length; i++) { | |
Daa += tg(a+b)-tg(a)+tg(a+k[i])-tg(a+b+n[i]); | |
Dbb += tg(a+b)-tg(b)+tg(b+n[i]-k[i])-tg(a+b+n[i]); | |
Dab += tg(a+b)-tg(a+b+n[i]); | |
} | |
return new double[][]{{Daa,Dab},{Dab,Dbb}}; | |
} | |
public double regularization(double[] x) { | |
return lambda*Math.pow(x[0]+x[1]-30, 2); | |
} | |
public double[] regularizationGradient(double[] x) { | |
return new double[]{2*lambda*x[0],2*lambda*x[1]}; | |
} | |
public double[][] regularizationHessian(double[] x) { | |
return new double[][]{{2*lambda,0},{0,2*lambda*x[1]}}; | |
} | |
public double f(double[] x) { | |
return -logLikelihood(x[0], x[1])+regularization(x); | |
} | |
public double[] gradient(double[] x) { | |
double[] G = logLikelihoodGradient(x[0], x[1]); | |
double[] r = regularizationGradient(x); | |
return new double[]{-G[0]+r[0],-G[1]+r[1]}; | |
} | |
public double[][] hessian(double[] x) { | |
double[][] H = logLikelihoodHessian(x[0], x[1]); | |
double[][] r = regularizationHessian(x); | |
return new double[][]{{-H[0][0]+r[0][0],-H[0][1]+r[0][1]},{-H[1][0]+r[1][0],-H[1][1]+r[1][1]}}; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package Newton; | |
import org.apache.commons.math3.special.Gamma; | |
public class Newton { | |
private static double tol=1e-5; // stop if largest component of gradient is less than tol | |
private static double xtol=1e-8; // or if change in optimization variable is less than xtol | |
private static int maxits = 20; // or after maxits iterations | |
private static double mu=1e-6; // Hessian positive-definiteness correction | |
// how do you pass in functions? what is an interface? | |
//public Newton(f, gradient, hessian, xstart) { | |
// | |
// } | |
// solve the 2x2 system A*x=rhs | |
private static double[] linearSolve(double[][] A, double[] rhs) { | |
double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1]; | |
double x=rhs[0], y=rhs[1]; | |
double disc = a*d-b*c; | |
return new double[]{(d*x-b*y)/disc, (a*y-c*x)/disc}; | |
} | |
// eigenvalues for a 2x2 matrix, used to fix an indefinite Hessian. If the Hessian isn't symmetric for some reason | |
// the square root might fail (not sure if java does imaginary numbers automatically) | |
private double[] eig(double[][] A) { | |
double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1]; | |
double temp = Math.sqrt(a*a+4*b*c-2*a*d+d*d); | |
double e1 = .5*(a+d-temp); | |
double e2 = .5*(a+d+temp); | |
return new double[]{e1, e2}; | |
} | |
public double[] solve() { | |
double[] eigenvalues; | |
double[][] H; | |
double[] p = {0,0}; | |
double[] x = {0,0}; | |
double[] x0 = {xstart[0],xstart[1]}; | |
double[] G = gradient(x0); | |
double correction; | |
double a; | |
int i; | |
for (i=0; i<maxits; i++) { | |
// to make sure the Newton direction is a descent direction, we force the Hessian to be positive definite | |
// by adding a small multiple of the identity matrix | |
H = hessian(x0); | |
eigenvalues = eig(H); | |
correction = Math.min(eigenvalues[0], eigenvalues[1]); | |
correction = Math.min(0, correction); | |
H[0][0] += mu-correction; | |
H[1][1] += mu-correction; | |
p = linearSolve(H,G); | |
a = 1; | |
x[0] = x0[0]-p[0]; | |
x[1] = x0[1]-p[1]; | |
// to make sure we improve the objective, do a back-tracking line search | |
// TODO: abstract away the x>0 constraint here, as that is specific to CVRLikelihood | |
while (a>1e-16 && (f(x)-f(x0)>0 || x[0]<=0 || x[1] <= 0)) { | |
a /= 2; | |
x[0] = x0[0]-a*p[0]; | |
x[1] = x0[1]-a*p[1]; | |
} | |
G = gradient(x); | |
if ((Math.abs(G[0])<tol && Math.abs(G[1])<tol) || Math.max(Math.abs(x[0]-x0[0]),Math.abs(x[1]-x0[1]))<xtol) | |
break; | |
x0 = new double[] {x[0],x[1]}; | |
} | |
if (i==maxits) | |
System.out.println("didn't converge after maximum number of iterations"); | |
return x; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment