sharnett/src_Newton_CVRLikelihood.java

## src_Newton_CVRLikelihood.java
package Newton;

import org.apache.commons.math3.special.Gamma;

/**
 * User: srharnett
 * Date: 6/15/13
 * Time: 6:08 PM
 */
public class CVRLikelihood {
    private double[] leads;
    private int[] sessions;
    private static double lambda = 1e-6;
    private static double[] xstart = {3,17};

    public CVRLikelihood(double[] leads, int[] sessions) {
        this.leads = leads;
        this.sessions = sessions;
    }

    private static double lg(double x) { return Gamma.logGamma(x); }
    private static double dg(double x) { return Gamma.digamma(x); }
    private static double tg(double x) { return Gamma.trigamma(x); }

    public double logLikelihood(double a, double b) {
        double[] k = leads;
        int[] n = sessions;
        if (a<=0 || b<=0)
            return Double.NEGATIVE_INFINITY;
        double total = 0;
        for (int i=0; i<k.length; i++)
            total += lg(a+b)-lg(a)-lg(b)+lg(a+k[i])+lg(b+n[i]-k[i])-lg(a+b+n[i]);
        return total;
    }
    public double[] logLikelihoodGradient(double a, double b) {
        double[] k = leads;
        int[] n = sessions;
        double Da=0, Db=0;
        for (int i=0; i<k.length; i++) {
            Da += dg(a+b)-dg(a)+dg(a+k[i])-dg(a+b+n[i]);
            Db += dg(a+b)-dg(b)+dg(b+n[i]-k[i])-dg(a+b+n[i]);
        }
        return new double[]{Da,Db};
    }
    public double[][] logLikelihoodHessian(double a, double b) {
        double[] k = leads;
        int[] n = sessions;
        double Daa=0, Dab=0, Dbb=0;
        for (int i=0; i<k.length; i++) {
            Daa += tg(a+b)-tg(a)+tg(a+k[i])-tg(a+b+n[i]);
            Dbb += tg(a+b)-tg(b)+tg(b+n[i]-k[i])-tg(a+b+n[i]);
            Dab += tg(a+b)-tg(a+b+n[i]);
        }
        return new double[][]{{Daa,Dab},{Dab,Dbb}};
    }
    public double regularization(double[] x) {
        return lambda*Math.pow(x[0]+x[1]-30, 2);
    }
    public double[] regularizationGradient(double[] x) {
        return new double[]{2*lambda*x[0],2*lambda*x[1]};
    }
    public double[][] regularizationHessian(double[] x) {
        return new double[][]{{2*lambda,0},{0,2*lambda*x[1]}};
    }
    public double f(double[] x) {
        return -logLikelihood(x[0], x[1])+regularization(x);
    }
    public double[] gradient(double[] x) {
        double[] G = logLikelihoodGradient(x[0], x[1]);
        double[] r = regularizationGradient(x);
        return new double[]{-G[0]+r[0],-G[1]+r[1]};
    }
    public double[][] hessian(double[] x) {
        double[][] H = logLikelihoodHessian(x[0], x[1]);
        double[][] r = regularizationHessian(x);
        return new double[][]{{-H[0][0]+r[0][0],-H[0][1]+r[0][1]},{-H[1][0]+r[1][0],-H[1][1]+r[1][1]}};
    }
}

## src_Newton_Newton.java
package Newton;

import org.apache.commons.math3.special.Gamma;

public class Newton {
    private static double tol=1e-5; // stop if largest component of gradient is less than tol
    private static double xtol=1e-8; // or if change in optimization variable is less than xtol
    private static int maxits = 20; // or after maxits iterations
    private static double mu=1e-6; // Hessian positive-definiteness correction

    // how do you pass in functions? what is an interface?
    //public Newton(f, gradient, hessian, xstart) {
    //
    // }

    // solve the 2x2 system A*x=rhs
    private static double[] linearSolve(double[][] A, double[] rhs) {
        double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
        double x=rhs[0], y=rhs[1];
        double disc = a*d-b*c;
        return new double[]{(d*x-b*y)/disc, (a*y-c*x)/disc};
    }

    // eigenvalues for a 2x2 matrix, used to fix an indefinite Hessian. If the Hessian isn't symmetric for some reason
    // the square root might fail (not sure if java does imaginary numbers automatically)
    private double[] eig(double[][] A) {
        double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
        double temp = Math.sqrt(a*a+4*b*c-2*a*d+d*d);
        double e1 = .5*(a+d-temp);
        double e2 = .5*(a+d+temp);
        return new double[]{e1, e2};
    }

    public double[] solve() {
        double[] eigenvalues;
        double[][] H;
        double[] p = {0,0};
        double[] x = {0,0};
        double[] x0 = {xstart[0],xstart[1]};
        double[] G = gradient(x0);
        double correction;
        double a;
        int i;
        for (i=0; i<maxits; i++) {
            // to make sure the Newton direction is a descent direction, we force the Hessian to be positive definite
            // by adding a small multiple of the identity matrix
            H = hessian(x0);
            eigenvalues = eig(H);
            correction = Math.min(eigenvalues[0], eigenvalues[1]);
            correction = Math.min(0, correction);
            H[0][0] += mu-correction;
            H[1][1] += mu-correction;
            p = linearSolve(H,G);
            a = 1;
            x[0] = x0[0]-p[0];
            x[1] = x0[1]-p[1];
            // to make sure we improve the objective, do a back-tracking line search
            // TODO: abstract away the x>0 constraint here, as that is specific to CVRLikelihood
            while (a>1e-16 && (f(x)-f(x0)>0 || x[0]<=0 || x[1] <= 0)) {
                a /= 2;
                x[0] = x0[0]-a*p[0];
                x[1] = x0[1]-a*p[1];
            }
            G = gradient(x);
            if ((Math.abs(G[0])<tol && Math.abs(G[1])<tol) || Math.max(Math.abs(x[0]-x0[0]),Math.abs(x[1]-x0[1]))<xtol)
                break;
            x0 = new double[] {x[0],x[1]};
        }
        if (i==maxits)
            System.out.println("didn't converge after maximum number of iterations");
        return x;
    }
}
	package Newton;

	import org.apache.commons.math3.special.Gamma;

	/**
	* User: srharnett
	* Date: 6/15/13
	* Time: 6:08 PM
	*/
	public class CVRLikelihood {
	private double[] leads;
	private int[] sessions;
	private static double lambda = 1e-6;
	private static double[] xstart = {3,17};

	public CVRLikelihood(double[] leads, int[] sessions) {
	this.leads = leads;
	this.sessions = sessions;
	}

	private static double lg(double x) { return Gamma.logGamma(x); }
	private static double dg(double x) { return Gamma.digamma(x); }
	private static double tg(double x) { return Gamma.trigamma(x); }

	public double logLikelihood(double a, double b) {
	double[] k = leads;
	int[] n = sessions;
	if (a<=0 \|\| b<=0)
	return Double.NEGATIVE_INFINITY;
	double total = 0;
	for (int i=0; i<k.length; i++)
	total += lg(a+b)-lg(a)-lg(b)+lg(a+k[i])+lg(b+n[i]-k[i])-lg(a+b+n[i]);
	return total;
	}
	public double[] logLikelihoodGradient(double a, double b) {
	double[] k = leads;
	int[] n = sessions;
	double Da=0, Db=0;
	for (int i=0; i<k.length; i++) {
	Da += dg(a+b)-dg(a)+dg(a+k[i])-dg(a+b+n[i]);
	Db += dg(a+b)-dg(b)+dg(b+n[i]-k[i])-dg(a+b+n[i]);
	}
	return new double[]{Da,Db};
	}
	public double[][] logLikelihoodHessian(double a, double b) {
	double[] k = leads;
	int[] n = sessions;
	double Daa=0, Dab=0, Dbb=0;
	for (int i=0; i<k.length; i++) {
	Daa += tg(a+b)-tg(a)+tg(a+k[i])-tg(a+b+n[i]);
	Dbb += tg(a+b)-tg(b)+tg(b+n[i]-k[i])-tg(a+b+n[i]);
	Dab += tg(a+b)-tg(a+b+n[i]);
	}
	return new double[][]{{Daa,Dab},{Dab,Dbb}};
	}
	public double regularization(double[] x) {
	return lambda*Math.pow(x[0]+x[1]-30, 2);
	}
	public double[] regularizationGradient(double[] x) {
	return new double[]{2lambdax[0],2lambdax[1]};
	}
	public double[][] regularizationHessian(double[] x) {
	return new double[][]{{2lambda,0},{0,2lambda*x[1]}};
	}
	public double f(double[] x) {
	return -logLikelihood(x[0], x[1])+regularization(x);
	}
	public double[] gradient(double[] x) {
	double[] G = logLikelihoodGradient(x[0], x[1]);
	double[] r = regularizationGradient(x);
	return new double[]{-G[0]+r[0],-G[1]+r[1]};
	}
	public double[][] hessian(double[] x) {
	double[][] H = logLikelihoodHessian(x[0], x[1]);
	double[][] r = regularizationHessian(x);
	return new double[][]{{-H[0][0]+r[0][0],-H[0][1]+r[0][1]},{-H[1][0]+r[1][0],-H[1][1]+r[1][1]}};
	}
	}
	package Newton;

	import org.apache.commons.math3.special.Gamma;

	public class Newton {
	private static double tol=1e-5; // stop if largest component of gradient is less than tol
	private static double xtol=1e-8; // or if change in optimization variable is less than xtol
	private static int maxits = 20; // or after maxits iterations
	private static double mu=1e-6; // Hessian positive-definiteness correction

	// how do you pass in functions? what is an interface?
	//public Newton(f, gradient, hessian, xstart) {
	//
	// }

	// solve the 2x2 system A*x=rhs
	private static double[] linearSolve(double[][] A, double[] rhs) {
	double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
	double x=rhs[0], y=rhs[1];
	double disc = ad-bc;
	return new double[]{(dx-by)/disc, (ay-cx)/disc};
	}

	// eigenvalues for a 2x2 matrix, used to fix an indefinite Hessian. If the Hessian isn't symmetric for some reason
	// the square root might fail (not sure if java does imaginary numbers automatically)
	private double[] eig(double[][] A) {
	double a=A[0][0], b=A[0][1], c=A[1][0], d=A[1][1];
	double temp = Math.sqrt(aa+4bc-2ad+dd);
	double e1 = .5*(a+d-temp);
	double e2 = .5*(a+d+temp);
	return new double[]{e1, e2};
	}

	public double[] solve() {
	double[] eigenvalues;
	double[][] H;
	double[] p = {0,0};
	double[] x = {0,0};
	double[] x0 = {xstart[0],xstart[1]};
	double[] G = gradient(x0);
	double correction;
	double a;
	int i;
	for (i=0; i<maxits; i++) {
	// to make sure the Newton direction is a descent direction, we force the Hessian to be positive definite
	// by adding a small multiple of the identity matrix
	H = hessian(x0);
	eigenvalues = eig(H);
	correction = Math.min(eigenvalues[0], eigenvalues[1]);
	correction = Math.min(0, correction);
	H[0][0] += mu-correction;
	H[1][1] += mu-correction;
	p = linearSolve(H,G);
	a = 1;
	x[0] = x0[0]-p[0];
	x[1] = x0[1]-p[1];
	// to make sure we improve the objective, do a back-tracking line search
	// TODO: abstract away the x>0 constraint here, as that is specific to CVRLikelihood
	while (a>1e-16 && (f(x)-f(x0)>0 \|\| x[0]<=0 \|\| x[1] <= 0)) {
	a /= 2;
	x[0] = x0[0]-a*p[0];
	x[1] = x0[1]-a*p[1];
	}
	G = gradient(x);
	if ((Math.abs(G[0])<tol && Math.abs(G[1])<tol) \|\| Math.max(Math.abs(x[0]-x0[0]),Math.abs(x[1]-x0[1]))<xtol)
	break;
	x0 = new double[] {x[0],x[1]};
	}
	if (i==maxits)
	System.out.println("didn't converge after maximum number of iterations");
	return x;
	}
	}