Skip to content

Instantly share code, notes, and snippets.

@sharnett
Created June 15, 2013 23:16
Show Gist options
  • Save sharnett/5789979 to your computer and use it in GitHub Desktop.
Save sharnett/5789979 to your computer and use it in GitHub Desktop.
Newton's method for beta-binomial maximum likelihood estimation
package Newton;
import org.apache.commons.math3.special.Gamma;
/**
* User: srharnett
* Date: 6/15/13
* Time: 6:08 PM
*/
public class CVRLikelihood {
private double[] leads;
private int[] sessions;
private static double lambda = 1e-6;
private static double[] xstart = {3,17};
public CVRLikelihood(double[] leads, int[] sessions) {
this.leads = leads;
this.sessions = sessions;
}
private static double lg(double x) { return Gamma.logGamma(x); }
private static double dg(double x) { return Gamma.digamma(x); }
private static double tg(double x) { return Gamma.trigamma(x); }
public double logLikelihood(double a, double b) {
double[] k = leads;
int[] n = sessions;
if (a<=0 || b<=0)
return Double.NEGATIVE_INFINITY;
double total = 0;
for (int i=0; i<k.length; i++)
total += lg(a+b)-lg(a)-lg(b)+lg(a+k[i])+lg(b+n[i]-k[i])-lg(a+b+n[i]);
return total;
}
public double[] logLikelihoodGradient(double a, double b) {
double[] k = leads;
int[] n = sessions;
double Da=0, Db=0;
for (int i=0; i<k.length; i++) {
Da += dg(a+b)-dg(a)+dg(a+k[i])-dg(a+b+n[i]);
Db += dg(a+b)-dg(b)+dg(b+n[i]-k[i])-dg(a+b+n[i]);
}
return new double[]{Da,Db};
}
public double[][] logLikelihoodHessian(double a, double b) {
double[] k = leads;
int[] n = sessions;
double Daa=0, Dab=0, Dbb=0;
for (int i=0; i<k.length; i++) {
Daa += tg(a+b)-tg(a)+tg(a+k[i])-tg(a+b+n[i]);
Dbb += tg(a+b)-tg(b)+tg(b+n[i]-k[i])-tg(a+b+n[i]);
Dab += tg(a+b)-tg(a+b+n[i]);
}
return new double[][]{{Daa,Dab},{Dab,Dbb}};
}
public double regularization(double[] x) {
return lambda*Math.pow(x[0]+x[1]-30, 2);
}
public double[] regularizationGradient(double[] x) {
return new double[]{2*lambda*x[0],2*lambda*x[1]};
}
public double[][] regularizationHessian(double[] x) {
return new double[][]{{2*lambda,0},{0,2*lambda*x[1]}};
}
public double f(double[] x) {
return -logLikelihood(x[0], x[1])+regularization(x);
}
public double[] gradient(double[] x) {
double[] G = logLikelihoodGradient(x[0], x[1]);
double[] r = regularizationGradient(x);
return new double[]{-G[0]+r[0],-G[1]+r[1]};
}
public double[][] hessian(double[] x) {
double[][] H = logLikelihoodHessian(x[0], x[1]);
double[][] r = regularizationHessian(x);
return new double[][]{{-H[0][0]+r[0][0],-H[0][1]+r[0][1]},{-H[1][0]+r[1][0],-H[1][1]+r[1][1]}};
}
}
package Newton;
import org.apache.commons.math3.special.Gamma;
public class Newton {
private static double tol=1e-5; // stop if largest component of gradient is less than tol
private static double xtol=1e-8; // or if change in optimization variable is less than xtol
private static int maxits = 20; // or after maxits iterations
private static double mu=1e-6; // Hessian positive-definiteness correction
// how do you pass in functions? what is an interface?
//public Newton(f, gradient, hessian, xstart) {
//
// }
// solve the 2x2 system A*x=rhs
private static double[] linearSolve(double[][] A, double[] rhs) {
double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
double x=rhs[0], y=rhs[1];
double disc = a*d-b*c;
return new double[]{(d*x-b*y)/disc, (a*y-c*x)/disc};
}
// eigenvalues for a 2x2 matrix, used to fix an indefinite Hessian. If the Hessian isn't symmetric for some reason
// the square root might fail (not sure if java does imaginary numbers automatically)
private double[] eig(double[][] A) {
double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
double temp = Math.sqrt(a*a+4*b*c-2*a*d+d*d);
double e1 = .5*(a+d-temp);
double e2 = .5*(a+d+temp);
return new double[]{e1, e2};
}
public double[] solve() {
double[] eigenvalues;
double[][] H;
double[] p = {0,0};
double[] x = {0,0};
double[] x0 = {xstart[0],xstart[1]};
double[] G = gradient(x0);
double correction;
double a;
int i;
for (i=0; i<maxits; i++) {
// to make sure the Newton direction is a descent direction, we force the Hessian to be positive definite
// by adding a small multiple of the identity matrix
H = hessian(x0);
eigenvalues = eig(H);
correction = Math.min(eigenvalues[0], eigenvalues[1]);
correction = Math.min(0, correction);
H[0][0] += mu-correction;
H[1][1] += mu-correction;
p = linearSolve(H,G);
a = 1;
x[0] = x0[0]-p[0];
x[1] = x0[1]-p[1];
// to make sure we improve the objective, do a back-tracking line search
// TODO: abstract away the x>0 constraint here, as that is specific to CVRLikelihood
while (a>1e-16 && (f(x)-f(x0)>0 || x[0]<=0 || x[1] <= 0)) {
a /= 2;
x[0] = x0[0]-a*p[0];
x[1] = x0[1]-a*p[1];
}
G = gradient(x);
if ((Math.abs(G[0])<tol && Math.abs(G[1])<tol) || Math.max(Math.abs(x[0]-x0[0]),Math.abs(x[1]-x0[1]))<xtol)
break;
x0 = new double[] {x[0],x[1]};
}
if (i==maxits)
System.out.println("didn't converge after maximum number of iterations");
return x;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment