Alrecenk/LogisticRegressionSimple.java

## LogisticRegressionSimple.java
/* A logistic regression algorithm for binary classification implemented using Newton's method and
 * a Wolfe condition based inexact line-search.
 *created by Alrecenk for inductivebias.com May 2014
 */
public class LogisticRegressionSimple {

  double w[] ; //the weights for the logistic regression
  int degree ; // degree of polynomial used for preprocessing

  //preprocessed list of input/output used for calculating error and its gradients
  double input[][];
  double output[] ;

  //these evaluation counters increment on every call to error, gradient, and hessian respectively
  public int feval,geval,heval;

  //create a logistic regression for binary classification on the given inputand output
  //with polynomial expansion of the given degree
  public LogisticRegressionSimple(double in[][], boolean out[], int degree ){
    this.degree = degree ;

    input = new double[in.length][];
    output = new double[out.length];

    for(int k=0;k<in.length;k++){
      input[k] = polynomial(in[k],degree);
    }

    int postotal = 0, negtotal = 0 ;
    double pos[] = new double[input[0].length] ;
    double neg[] = new double[input[0].length] ;
    //get totals for negative and positive points
    for(int k=0;k<in.length;k++){
      if(out[k]){
        output[k] = 1 ;
        pos = add(pos,input[k]) ;
        postotal++;
      }else{
        neg = add(neg,input[k]) ;
        negtotal++;
      }
    }

    //for non-polynomial case use starting weights pointing from centroid of negatives to centroid of positives.
    if(postotal >= 1 && negtotal >=1 && degree==1){
      double pp=0, pn=0, nn=0 ;
      for(int k=0;k<pos.length;k++){
        pos[k]/=postotal ;//scale totals to get center of each class
        neg[k]/=negtotal ;
        pp += pos[k]*pos[k] ;
        pn += pos[k]*neg[k] ;// calculate relevant dot products
        nn += neg[k]*neg[k] ;
      }
      //assuming w = alpha * (poscenter- negcenter) with b in the last slot
      //solve for alpha and b so that poscenter returns 0.75 and negcenter returns 0.25
      double alphab[] = lineIntersection(pp-pn,1,sinv(0.75),pn-nn,1,sinv(0.25)) ;
      w = new double[input[0].length] ;
      for(int k=0;k<w.length-1;k++){
        w[k] = alphab[0] * (pos[k] - neg[k]) ; // alpha * pos-neg
      }
      w[w.length-1] = alphab[1]; // bias is on the end of w
    }else{
      w = new double[pos.length] ;
    }

    //run newton's method to get locally optimal weights
    w = newtonMethod(w, 0.00001 * input.length , 1000000) ;
    //dump data after the curve fitting is complete
    input= null ;
    output = null ;
  }

  //applies the logistic regression to predict a new point's probability of being in the positive class
  public double apply(double i[]){
    return s( dot(w,polynomial(i,degree) )) ;
  }

  //returns the error of a logistic regressions with weights w on the given input and output
  //output should be in the form 0 for negative, 1 for positive
  public double error(double w[]){
    feval++;//keep track of how many times this has been called
    double error = 0 ;
    for(int k=0;k<input.length;k++){
      double diff = s( dot(w,input[k]) ) - output[k] ;
      error += diff*diff ;
    }
    return error ;
  }

  //returns the gradient of error with respect to weights
  //for a logistic regression with weights w on the given input and output
  //output should be in the form 0 for negative, 1 for positive
  public double[] gradient(double w[]){
    geval++;//keep track of how many times this has been called
    double g[] = new double[w.length] ;
    for(int k=0;k<input.length;k++){
      double dot = dot(w,input[k]) ;
      double coef = 2 * ( s(dot) - output[k] ) * ds(dot) ;
      for(int j=0;j<g.length;j++){
        g[j] += input[k][j] * coef ;
      }
    }
    return g ;
  }

  //returns a numerically calculated gradient - approximation to above
  //used only for unit testing gradient, not called in final version
  public double[] numericalGradient(double w[], double epsilon){
    double g[] = new double[w.length] ;
    for(int j=0;j<g.length;j++){
      w[j]+=epsilon ;
      g[j] = error(w) ;
      w[j] -= 2*epsilon ;
      g[j] -= error(w) ;
      w[j] +=epsilon ;
      g[j] /= 2*epsilon ;
    }
    return g ;
  }

  //returns the hessian (gradient of gradient) of error with respect to weights
  //for a logistic regression with weights w on the given input and output
  //output should be in the form 0 for negative, 1 for positive
  public double[][] hessian(double w[]){
    heval++;//keep track of how many times this has been called
    double h[][] = new double[w.length][] ;
    //second derivative matrices are always symmetric so we only need triangular portion
    for(int j=0;j<h.length;j++){
      h[j] = new double [j+1] ;
    }
    for(int k=0;k<input.length;k++){
      //calculate coefficient{
      double dot = dot(w,input[k]) ;
      double ds = ds(dot) ;
      double coef = 2 * ( ds*ds + dds(dot) * (s(dot) - output[k])  ) ;
      for(int j=0;j<h.length;j++){// add x * x^t * coef to hessian
        for(int l=0;l<=j;l++){
          h[j][l] += input[k][j]*input[k][l] * coef ;
        }
      }
    }
    return h ;
  }

  //returns a numerically calculated hessian - approximation to above
  //used only for unit testing hessian, not called in final version
  public double[][] numericalHessian(double w[], double epsilon){
    double h[][] = new double[w.length][] ;
    for(int j=0;j<h.length;j++){
      w[j]+=epsilon ;
      h[j] = gradient(w) ;
      w[j] -= 2*epsilon ;
      h[j] = subtract(h[j], gradient(w)) ;
      w[j] +=epsilon ;
      for(int k=0;k<w.length;k++){
        h[j][k]/=2*epsilon ;
      }
    }
    return h ;
  }

  //sigmoid/logistic function
  public static double s(double x){
    double ex = Math.exp(x);
    return ex / (ex+1) ;
  }

  //derivative of sigmoid/logistic function
  public static double ds(double x){
    double ex = Math.exp(x);
    return ex / ( (ex+1)*(ex+1)) ;
  }

  //second derivative of sigmoid/logistic function
  public static double dds(double x){
    double ex = Math.exp(x);
    return (ex * (1-ex)) / ((ex+1)*(ex+1)*(ex+1)) ;
  }

  //inverse of sigmoid/logistic function
  public static double sinv(double y){
    return Math.log( y / (1-y) ) ;
  }

  //starting from w0 searches for a weight vector using gradient descent
  //and Wolfe condition line-search until the gradient magnitude is below tolerance
  //or a maximum number of iterations is reached.
  public double[] gradientDescent(double w0[], double tolerance, int maxiter){
    double w[] = w0 ;
    double gradient[] = gradient(w0) ;
    int iteration = 0 ;
    while(Math.sqrt(dot(gradient,gradient)) > tolerance && iteration < maxiter){
      iteration++ ;
      //calculate step-size in direction of negative gradient
      double alpha = stepSize(this, w, scale(gradient,-1), 1, 500, 0.1, 0.9) ;
      w = add( w, scale( gradient, -alpha)) ; // apply step
      gradient = gradient(w) ; // get new gradient
    }
    return w ;
  }


  //starting from w0 searches for a weight vector using Newton's method
  //and Wolfe condition line-search until the gradient magnitude is below tolerance
  //or a maximum number of iterations is reached.
  public double[] newtonMethod(double w0[], double tolerance, int maxiter){
    double w[] = w0 ;
    double gradient[] = gradient(w0) ;
    int iteration = 0 ;
    while(Math.sqrt(dot(gradient,gradient)) > tolerance && iteration < maxiter){
      iteration++;
      //get the second derivative matrix
      double hessian[][] = hessian(w) ;

      //perform an LDL decomposition and substitution to solve the system of equations Hd = -g  for the Newton step
      //(See Linear Least Squares Article on Inductive Bias for details on this technique)

      //calculate the LDL decomposition in place with D over top of the diagonal
      for (int j = 0; j < w.length; j++){
        for (int k = 0; k < j; k++){//D starts as Hjj then subtracts
          hessian[j][j] -= hessian[j][k] * hessian[j][k] * hessian[k][k];//Ljk^2 * Dk
        }
        for (int i = j + 1; i < w.length; i++){//L over the lower diagonal
          for (int k = 0; k < j; k++){//Lij starts as Hij then subtracts
            hessian[i][j] -= hessian[i][k] * hessian[j][k] * hessian[k][k];//Ljk^2*D[k]
          }
          hessian[i][j] /= hessian[j][j];//divide Lij by Dj
        }
      }

      //check if D elements are all positive to make sure Hessian was positive definite and Newton step goes to a minimum
      boolean positivedefinite = true ;
      for(int k=0;k<w.length&&positivedefinite;k++){
        positivedefinite &= hessian[k][k] > 0 ;
      }

      //right hand side for Newton's method is negative gradient
      double[] newton = scale(gradient,-1);
      if(positivedefinite){ // if H was pd then get newton direction, otherwise leave it as -gradient
        //in-place forward substitution with L
        for (int j = 0; j < w.length; j++){
          for (int i = 0; i < j; i++){
            newton[j] -= hessian[j][i] * newton[i];
          }
        }
        //Multiply by inverse of D matrix
        for (int k = 0; k < w.length; k++){//inverse of diagonal
          newton[k] /= hessian[k][k];//is 1 / each element
        }
        // in-place backward substitution with L^T
        for (int j = w.length - 1; j >= 0; j--){
          for (int i = j + 1; i < w.length; i++){
            newton[j] -= hessian[i][j] * newton[i];
          }
        }
      }

      //calculate step-size
      double alpha = stepSize(this, w, newton, 1, 500, 0.001, 0.9) ;// then use it
      //apply step
      w = add( w, scale( newton, alpha)) ;
      //calculate gradient for exit condition and next step
      gradient = gradient(w) ;
    }

    return w ;
  }

  //Performs a binary search to satisfy the Wolfe conditions
  //returns alpha where next x =should be x0 + alpha*d
  //guarantees convergence as long as search direction is bounded away from being orthogonal with gradient
  //x0 is starting point, d is search direction, alpha is starting step size, maxit is max iterations
  //c1 and c2 are the constants of the Wolfe conditions (0.1 and 0.9 can work)
  public static double stepSize(LogisticRegressionSimple problem, double[] x0, double[] d, double alpha, int maxit, double c1, double c2){

    //get error and gradient at starting point
    double fx0 = problem.error(x0);
    double gx0 = dot(problem.gradient(x0), d);

    //bound the solution
    double alphaL = 0;
    double alphaR = 10000;

    for (int iter = 1; iter <= maxit; iter++){
      double[] xp = add(x0, scale(d, alpha)); // get the point at this alpha
      double erroralpha = problem.error(xp); //get the error at that point
      if (erroralpha >= fx0 + alpha * c1 * gx0)  { // if error is not sufficiently reduced
        alphaR = alpha;//move halfway between current alpha and lower alpha
        alpha = (alphaL + alphaR)/2.0;
      }else{//if error is sufficiently decreased
        double slopealpha = dot(problem.gradient(xp), d); // then get slope along search direction
        if (slopealpha <= c2 * Math.abs(gx0)){ // if slope sufficiently closer to 0
          return alpha;//then this is an acceptable point
        }else if ( slopealpha >= c2 * gx0) { // if slope is too steep and positive then go to the left
          alphaR = alpha;//move halfway between current alpha and lower alpha
          alpha = (alphaL+ alphaR)/2;
        }else{//if slope is too steep and negative then go to the right of this alpha
          alphaL = alpha;//move halfway between current alpha and upper alpha
          alpha = (alphaL+ alphaR)/2;
        }
      }
    }

    //if ran out of iterations then return the best thing we got
    return alpha;
  }

  //dot product
  public static double dot(double[] a, double[] b){
    double c = 0;
    for (int k = 0; k < a.length; k++){
      c += a[k] * b[k];
    }
    return c;
  }

  //returns a vector = to a*s
  public static double[] scale(double[] a, double s){
    double[] b = new double[a.length];
    for (int k = 0; k < a.length; k++){
      b[k] = a[k] * s;
    }
    return b;
  }

  //returns the sum of two vectors
  public static double[] add(double[] a, double[] b){
    double[] c = new double[a.length];
    for (int k = 0; k < a.length; k++){
      c[k] = a[k] + b[k];
    }
    return c;
  }

  //returns the sum of two vectors
  public static double[] subtract(double[] a, double[] b){
    double[] c = new double[a.length];
    for (int k = 0; k < a.length; k++){
      c[k] = a[k] - b[k];
    }
    return c;
  }

  //Creates a new input vector which is a 1, and each input raised to integer powers up to degree
  //when called with degree=1 this simply adds a bias value to the input vector
  //otherwise it creates a separable polynomial of the given degree
  public static double[] polynomial(double[] input, int degree){
    double[] output = new double[1 + input.length * degree];
    int i = 0, k, j;
    for (k = 0; k < input.length; k++){//each input
      for (j = 1; j <= degree; j++){// raised to each power
        output[i] = (double)Math.pow(input[k], j);
        i++;
      }
    }
    output[i] = 1; //constant
    return output;
  }

  //returns the intersection of 2D lines given in standard form
  // a1*x + b1*y = c1  and a2*x + b2*y = c2
  public static double[] lineIntersection(double a1, double b1, double c1, double a2, double b2, double c2){
    double det = a1*b2 - a2*b1 ;
    if( det == 0){
      return null ;
    }else{
      return new double[]{ (c1*b2 - b1*c2)/det, (a1*c2-c1*a2)/det} ;
    }
  }

}
	/* A logistic regression algorithm for binary classification implemented using Newton's method and
	* a Wolfe condition based inexact line-search.
	*created by Alrecenk for inductivebias.com May 2014
	*/
	public class LogisticRegressionSimple {

	double w[] ; //the weights for the logistic regression
	int degree ; // degree of polynomial used for preprocessing

	//preprocessed list of input/output used for calculating error and its gradients
	double input[][];
	double output[] ;

	//these evaluation counters increment on every call to error, gradient, and hessian respectively
	public int feval,geval,heval;

	//create a logistic regression for binary classification on the given inputand output
	//with polynomial expansion of the given degree
	public LogisticRegressionSimple(double in[][], boolean out[], int degree ){
	this.degree = degree ;

	input = new double[in.length][];
	output = new double[out.length];

	for(int k=0;k<in.length;k++){
	input[k] = polynomial(in[k],degree);
	}

	int postotal = 0, negtotal = 0 ;
	double pos[] = new double[input[0].length] ;
	double neg[] = new double[input[0].length] ;
	//get totals for negative and positive points
	for(int k=0;k<in.length;k++){
	if(out[k]){
	output[k] = 1 ;
	pos = add(pos,input[k]) ;
	postotal++;
	}else{
	neg = add(neg,input[k]) ;
	negtotal++;
	}
	}

	//for non-polynomial case use starting weights pointing from centroid of negatives to centroid of positives.
	if(postotal >= 1 && negtotal >=1 && degree==1){
	double pp=0, pn=0, nn=0 ;
	for(int k=0;k<pos.length;k++){
	pos[k]/=postotal ;//scale totals to get center of each class
	neg[k]/=negtotal ;
	pp += pos[k]*pos[k] ;
	pn += pos[k]*neg[k] ;// calculate relevant dot products
	nn += neg[k]*neg[k] ;
	}
	//assuming w = alpha * (poscenter- negcenter) with b in the last slot
	//solve for alpha and b so that poscenter returns 0.75 and negcenter returns 0.25
	double alphab[] = lineIntersection(pp-pn,1,sinv(0.75),pn-nn,1,sinv(0.25)) ;
	w = new double[input[0].length] ;
	for(int k=0;k<w.length-1;k++){
	w[k] = alphab[0] * (pos[k] - neg[k]) ; // alpha * pos-neg
	}
	w[w.length-1] = alphab[1]; // bias is on the end of w
	}else{
	w = new double[pos.length] ;
	}

	//run newton's method to get locally optimal weights
	w = newtonMethod(w, 0.00001 * input.length , 1000000) ;
	//dump data after the curve fitting is complete
	input= null ;
	output = null ;
	}

	//applies the logistic regression to predict a new point's probability of being in the positive class
	public double apply(double i[]){
	return s( dot(w,polynomial(i,degree) )) ;
	}

	//returns the error of a logistic regressions with weights w on the given input and output
	//output should be in the form 0 for negative, 1 for positive
	public double error(double w[]){
	feval++;//keep track of how many times this has been called
	double error = 0 ;
	for(int k=0;k<input.length;k++){
	double diff = s( dot(w,input[k]) ) - output[k] ;
	error += diff*diff ;
	}
	return error ;
	}

	//returns the gradient of error with respect to weights
	//for a logistic regression with weights w on the given input and output
	//output should be in the form 0 for negative, 1 for positive
	public double[] gradient(double w[]){
	geval++;//keep track of how many times this has been called
	double g[] = new double[w.length] ;
	for(int k=0;k<input.length;k++){
	double dot = dot(w,input[k]) ;
	double coef = 2 * ( s(dot) - output[k] ) * ds(dot) ;
	for(int j=0;j<g.length;j++){
	g[j] += input[k][j] * coef ;
	}
	}
	return g ;
	}

	//returns a numerically calculated gradient - approximation to above
	//used only for unit testing gradient, not called in final version
	public double[] numericalGradient(double w[], double epsilon){
	double g[] = new double[w.length] ;
	for(int j=0;j<g.length;j++){
	w[j]+=epsilon ;
	g[j] = error(w) ;
	w[j] -= 2*epsilon ;
	g[j] -= error(w) ;
	w[j] +=epsilon ;
	g[j] /= 2*epsilon ;
	}
	return g ;
	}

	//returns the hessian (gradient of gradient) of error with respect to weights
	//for a logistic regression with weights w on the given input and output
	//output should be in the form 0 for negative, 1 for positive
	public double[][] hessian(double w[]){
	heval++;//keep track of how many times this has been called
	double h[][] = new double[w.length][] ;
	//second derivative matrices are always symmetric so we only need triangular portion
	for(int j=0;j<h.length;j++){
	h[j] = new double [j+1] ;
	}
	for(int k=0;k<input.length;k++){
	//calculate coefficient{
	double dot = dot(w,input[k]) ;
	double ds = ds(dot) ;
	double coef = 2 * ( dsds + dds(dot) (s(dot) - output[k]) ) ;
	for(int j=0;j<h.length;j++){// add x * x^t * coef to hessian
	for(int l=0;l<=j;l++){
	h[j][l] += input[k][j]input[k][l] coef ;
	}
	}
	}
	return h ;
	}

	//returns a numerically calculated hessian - approximation to above
	//used only for unit testing hessian, not called in final version
	public double[][] numericalHessian(double w[], double epsilon){
	double h[][] = new double[w.length][] ;
	for(int j=0;j<h.length;j++){
	w[j]+=epsilon ;
	h[j] = gradient(w) ;
	w[j] -= 2*epsilon ;
	h[j] = subtract(h[j], gradient(w)) ;
	w[j] +=epsilon ;
	for(int k=0;k<w.length;k++){
	h[j][k]/=2*epsilon ;
	}
	}
	return h ;
	}

	//sigmoid/logistic function
	public static double s(double x){
	double ex = Math.exp(x);
	return ex / (ex+1) ;
	}

	//derivative of sigmoid/logistic function
	public static double ds(double x){
	double ex = Math.exp(x);
	return ex / ( (ex+1)*(ex+1)) ;
	}

	//second derivative of sigmoid/logistic function
	public static double dds(double x){
	double ex = Math.exp(x);
	return (ex * (1-ex)) / ((ex+1)(ex+1)(ex+1)) ;
	}

	//inverse of sigmoid/logistic function
	public static double sinv(double y){
	return Math.log( y / (1-y) ) ;
	}

	//starting from w0 searches for a weight vector using gradient descent
	//and Wolfe condition line-search until the gradient magnitude is below tolerance
	//or a maximum number of iterations is reached.
	public double[] gradientDescent(double w0[], double tolerance, int maxiter){
	double w[] = w0 ;
	double gradient[] = gradient(w0) ;
	int iteration = 0 ;
	while(Math.sqrt(dot(gradient,gradient)) > tolerance && iteration < maxiter){
	iteration++ ;
	//calculate step-size in direction of negative gradient
	double alpha = stepSize(this, w, scale(gradient,-1), 1, 500, 0.1, 0.9) ;
	w = add( w, scale( gradient, -alpha)) ; // apply step
	gradient = gradient(w) ; // get new gradient
	}
	return w ;
	}


	//starting from w0 searches for a weight vector using Newton's method
	//and Wolfe condition line-search until the gradient magnitude is below tolerance
	//or a maximum number of iterations is reached.
	public double[] newtonMethod(double w0[], double tolerance, int maxiter){
	double w[] = w0 ;
	double gradient[] = gradient(w0) ;
	int iteration = 0 ;
	while(Math.sqrt(dot(gradient,gradient)) > tolerance && iteration < maxiter){
	iteration++;
	//get the second derivative matrix
	double hessian[][] = hessian(w) ;

	//perform an LDL decomposition and substitution to solve the system of equations Hd = -g for the Newton step
	//(See Linear Least Squares Article on Inductive Bias for details on this technique)

	//calculate the LDL decomposition in place with D over top of the diagonal
	for (int j = 0; j < w.length; j++){
	for (int k = 0; k < j; k++){//D starts as Hjj then subtracts
	hessian[j][j] -= hessian[j][k] * hessian[j][k] * hessian[k][k];//Ljk^2 * Dk
	}
	for (int i = j + 1; i < w.length; i++){//L over the lower diagonal
	for (int k = 0; k < j; k++){//Lij starts as Hij then subtracts
	hessian[i][j] -= hessian[i][k] * hessian[j][k] * hessian[k][k];//Ljk^2*D[k]
	}
	hessian[i][j] /= hessian[j][j];//divide Lij by Dj
	}
	}

	//check if D elements are all positive to make sure Hessian was positive definite and Newton step goes to a minimum
	boolean positivedefinite = true ;
	for(int k=0;k<w.length&&positivedefinite;k++){
	positivedefinite &= hessian[k][k] > 0 ;
	}

	//right hand side for Newton's method is negative gradient
	double[] newton = scale(gradient,-1);
	if(positivedefinite){ // if H was pd then get newton direction, otherwise leave it as -gradient
	//in-place forward substitution with L
	for (int j = 0; j < w.length; j++){
	for (int i = 0; i < j; i++){
	newton[j] -= hessian[j][i] * newton[i];
	}
	}
	//Multiply by inverse of D matrix
	for (int k = 0; k < w.length; k++){//inverse of diagonal
	newton[k] /= hessian[k][k];//is 1 / each element
	}
	// in-place backward substitution with L^T
	for (int j = w.length - 1; j >= 0; j--){
	for (int i = j + 1; i < w.length; i++){
	newton[j] -= hessian[i][j] * newton[i];
	}
	}
	}

	//calculate step-size
	double alpha = stepSize(this, w, newton, 1, 500, 0.001, 0.9) ;// then use it
	//apply step
	w = add( w, scale( newton, alpha)) ;
	//calculate gradient for exit condition and next step
	gradient = gradient(w) ;
	}

	return w ;
	}

	//Performs a binary search to satisfy the Wolfe conditions
	//returns alpha where next x =should be x0 + alpha*d
	//guarantees convergence as long as search direction is bounded away from being orthogonal with gradient
	//x0 is starting point, d is search direction, alpha is starting step size, maxit is max iterations
	//c1 and c2 are the constants of the Wolfe conditions (0.1 and 0.9 can work)
	public static double stepSize(LogisticRegressionSimple problem, double[] x0, double[] d, double alpha, int maxit, double c1, double c2){

	//get error and gradient at starting point
	double fx0 = problem.error(x0);
	double gx0 = dot(problem.gradient(x0), d);

	//bound the solution
	double alphaL = 0;
	double alphaR = 10000;

	for (int iter = 1; iter <= maxit; iter++){
	double[] xp = add(x0, scale(d, alpha)); // get the point at this alpha
	double erroralpha = problem.error(xp); //get the error at that point
	if (erroralpha >= fx0 + alpha * c1 * gx0) { // if error is not sufficiently reduced
	alphaR = alpha;//move halfway between current alpha and lower alpha
	alpha = (alphaL + alphaR)/2.0;
	}else{//if error is sufficiently decreased
	double slopealpha = dot(problem.gradient(xp), d); // then get slope along search direction
	if (slopealpha <= c2 * Math.abs(gx0)){ // if slope sufficiently closer to 0
	return alpha;//then this is an acceptable point
	}else if ( slopealpha >= c2 * gx0) { // if slope is too steep and positive then go to the left
	alphaR = alpha;//move halfway between current alpha and lower alpha
	alpha = (alphaL+ alphaR)/2;
	}else{//if slope is too steep and negative then go to the right of this alpha
	alphaL = alpha;//move halfway between current alpha and upper alpha
	alpha = (alphaL+ alphaR)/2;
	}
	}
	}

	//if ran out of iterations then return the best thing we got
	return alpha;
	}

	//dot product
	public static double dot(double[] a, double[] b){
	double c = 0;
	for (int k = 0; k < a.length; k++){
	c += a[k] * b[k];
	}
	return c;
	}

	//returns a vector = to a*s
	public static double[] scale(double[] a, double s){
	double[] b = new double[a.length];
	for (int k = 0; k < a.length; k++){
	b[k] = a[k] * s;
	}
	return b;
	}

	//returns the sum of two vectors
	public static double[] add(double[] a, double[] b){
	double[] c = new double[a.length];
	for (int k = 0; k < a.length; k++){
	c[k] = a[k] + b[k];
	}
	return c;
	}

	//returns the sum of two vectors
	public static double[] subtract(double[] a, double[] b){
	double[] c = new double[a.length];
	for (int k = 0; k < a.length; k++){
	c[k] = a[k] - b[k];
	}
	return c;
	}

	//Creates a new input vector which is a 1, and each input raised to integer powers up to degree
	//when called with degree=1 this simply adds a bias value to the input vector
	//otherwise it creates a separable polynomial of the given degree
	public static double[] polynomial(double[] input, int degree){
	double[] output = new double[1 + input.length * degree];
	int i = 0, k, j;
	for (k = 0; k < input.length; k++){//each input
	for (j = 1; j <= degree; j++){// raised to each power
	output[i] = (double)Math.pow(input[k], j);
	i++;
	}
	}
	output[i] = 1; //constant
	return output;
	}

	//returns the intersection of 2D lines given in standard form
	// a1x + b1y = c1 and a2x + b2y = c2
	public static double[] lineIntersection(double a1, double b1, double c1, double a2, double b2, double c2){
	double det = a1b2 - a2b1 ;
	if( det == 0){
	return null ;
	}else{
	return new double[]{ (c1b2 - b1c2)/det, (a1c2-c1a2)/det} ;
	}
	}

	}