hughrawlinson/linear_regression.js

## linear_regression.js
// This may or may not help you understand what's going
// on in week one of the Stanford Machine Learning course
// particulary if you're coming from Javascript programming.

// The lecture deals with linear regression. You may remember
// a concept called "line of best fit" from middle school.
// A refresher: you have a set of points with x and y co-ordinates
// and you have to draw a straight line through them. It doesn't
// have to intersect all (or any) of the points, but it does need
// to represent the general layout of your dataset. The idea is
// that this will let you make guesses for the y position of a
// new point when you only know the x or vice versa. It often
// won't be 100% accurate, but it'll give you a good idea, and a
// good understanding of the range of possible error it could have
// made.

// A linear regression algorithm is just a way to make a computer
// figure out what the line of best fit for a given dataset is.
// The linear regression algorithm I describe below comes from
// the Stanford Machine Learning course on Coursera. I found it
// to be a bit too mathsy, so I'm writing it out in code, which
// may help some people understand.

// Linear regression algorithms return a function that takes an
// x and returns a guess for the corresponding y value, based on
// the *specific* dataset you trained against. This function is
// called a "model". There are many other kinds of models for
// various other kinds of machine learning algorithms, but because
// we're focusing on linear regression, this model will be the
// function of a line. You may remember this as `y=mx+c` where
// `m` is the slope of the line, and c is the horizontal offset of
// the line, which you can think about as 'the value of the equation
// when x is 0'. In the course, this is written as hθ1=θ1+θ2*x.

(()=>{

  // You'll need to make lots of linear functions, so
  // lets make that easy.
  function prepareLinearFunction (theta1, theta2) {
    return x => theta1 + theta2 * x;
  }

  // We're assuming in this example that your dataset contains
  // two columns, and therefore we have two thetas. You could
  // generalise the above function to take more columns into
  // account, but for now, lets focus on just two columns.

  // Lets define a dataset to learn from. It is possible to
  // accurately model this dataset with a line, so we should
  // end up without any errors.
  const dataset = [
    [0,1],
    [1,3],
    [2,5],
    [3,7]
  ];
  // You'll notice that the 0th element of each inner array is
  // the index of that element. I could just use the index,
  // but I wanted to make it more obvious.

  // Next you'll need a "cost function", which is just a
  // fancy name for a function that measures how well a
  // model is performing at it's task. To calculate this,
  // the function needs both of the current thetas, and
  // the dataset it's training against
  function costFunction(theta1, theta2, dataset) {
    // Here we generate our model using the function we
    // wrote earlier, giving it our current theta values.
    const model = prepareLinearFunction(theta1,theta2);

    // We apply a procedure to our model for each row
    // in the dataset.
    return dataset.map((x)=>{
      // First we get the difference between what the
      // data row says about the mapping and what our
      // model says.
      const difference = model(x[0])-x[1];

      // We then return the square of that difference.
      return Math.pow(difference,2);
    }) // Now we have an array of the square of the difference
    // between each of the examples and what we predicted for
    // their x value. The following line sums that array, which
    // we divide by the length of the dataset, and return.
    .reduce((x,y)=>{return x+y},0)/dataset.length*2;
  }

  // Ok, now we have a cost function. Next up, we implement
  // "gradient descent", which is just a prodedure for modifying
  // our theta values in a way that will decrease the overall "cost"
  // (otherwise known as "error") of our model.

  function gradientDescent(learningRate, thetaOne, thetaTwo, dataset) {
    // First, we calculate the error of the model that our current
    // values of theta creates
    const error = costFunction(thetaOne, thetaTwo, dataset);

    // Then we use the error, the learning rate to update each theta
    // thetas must be updated simultaneously - don't get into a
    // situation where you update one and you use that new theta
    // to update your next theta. It would probably work, but it's
    // not proper gradient descent.
    /*const newThetas = [thetaOne, thetaTwo].map(function(theta){
      // Note - I don't know how to calculate derivative of the cost
      // function yet. It wasn't covered in this lecture. It's a
      // measure of how far away your thetas are from a "local minimum"
      // in error, it's what guides your thetas towards finding their
      // correct values.
      theta * learningRate * derivitave * error;
    });*/
    //please excuse this hackery nonsense

    const model = prepareLinearFunction(thetaOne,thetaTwo);

    const a = dataset.map((x) => {
      return model(x[0])-x[1];
    }).reduce((x,y) => {return x + y}, 0)/dataset.length;

    const b = dataset.map((x) => {
      return (model(x[0])-x[1])*x[0];
    }).reduce((x,y) => {return x + y}, 0)/dataset.length;

    const newThetas = [
      thetaOne - learningRate * a,
      thetaTwo - learningRate * b
    ];
debugger;
    return { newThetas, error };
  }

  // Lets run through the whole training process
  (function linearRegress(dataset){
    // Initialize thetas to zero. It's not important that
    // they're zero, they could start out as any number,
    // but zero works just fine.
    const initialThetaOne = 0;
    const initialThetaTwo = 0;
    const learningRate = 0.2;

    // Create a named function so that you can recurse
    // through the values of theta that gradient descent gives
    // you until you converge.
    (function reg(learningRate, thetaOne, thetaTwo, dataset) {
      console.log(`Current thetas: ${thetaOne}, ${thetaTwo}`);

      // Run gradient descent
      const {error,newThetas} = gradientDescent(learningRate, thetaOne, thetaTwo, dataset);

      console.log(`Those thetas gave us an error of ${error}.`);

      if (error < 0.00001) {
        // Our error is zero! That means our thetas have
        // "converged", or "settled" on values that make sense
        // for the dataset. Note that because our dataset fits
        // perfectly on a line, we can check that our thetas
        // match exactly. When your dataset is always going to
        // have a certain amount of error in it (i.e. when the
        // data doesn't exactly match any straight line), you'll
        // have to check tolerate some difference.
        console.log(`Convergeance! Line of best fit is: y = ${thetaOne} + ${thetaTwo}x.`);
        process.exit();
      }

      // Recursively call this function with updated thetas until
      // convergeance happens and the condition above is met
      console.log("Running gradient descent again to reduce the error.")
      reg(learningRate, newThetas[0], newThetas[1], dataset);
    })(learningRate, initialThetaOne, initialThetaTwo, dataset);
  })(dataset);
})();
	// This may or may not help you understand what's going
	// on in week one of the Stanford Machine Learning course
	// particulary if you're coming from Javascript programming.

	// The lecture deals with linear regression. You may remember
	// a concept called "line of best fit" from middle school.
	// A refresher: you have a set of points with x and y co-ordinates
	// and you have to draw a straight line through them. It doesn't
	// have to intersect all (or any) of the points, but it does need
	// to represent the general layout of your dataset. The idea is
	// that this will let you make guesses for the y position of a
	// new point when you only know the x or vice versa. It often
	// won't be 100% accurate, but it'll give you a good idea, and a
	// good understanding of the range of possible error it could have
	// made.

	// A linear regression algorithm is just a way to make a computer
	// figure out what the line of best fit for a given dataset is.
	// The linear regression algorithm I describe below comes from
	// the Stanford Machine Learning course on Coursera. I found it
	// to be a bit too mathsy, so I'm writing it out in code, which
	// may help some people understand.

	// Linear regression algorithms return a function that takes an
	// x and returns a guess for the corresponding y value, based on
	// the specific dataset you trained against. This function is
	// called a "model". There are many other kinds of models for
	// various other kinds of machine learning algorithms, but because
	// we're focusing on linear regression, this model will be the
	// function of a line. You may remember this as `y=mx+c` where
	// `m` is the slope of the line, and c is the horizontal offset of
	// the line, which you can think about as 'the value of the equation
	// when x is 0'. In the course, this is written as hθ1=θ1+θ2*x.

	(()=>{

	// You'll need to make lots of linear functions, so
	// lets make that easy.
	function prepareLinearFunction (theta1, theta2) {
	return x => theta1 + theta2 * x;
	}

	// We're assuming in this example that your dataset contains
	// two columns, and therefore we have two thetas. You could
	// generalise the above function to take more columns into
	// account, but for now, lets focus on just two columns.

	// Lets define a dataset to learn from. It is possible to
	// accurately model this dataset with a line, so we should
	// end up without any errors.
	const dataset = [
	[0,1],
	[1,3],
	[2,5],
	[3,7]
	];
	// You'll notice that the 0th element of each inner array is
	// the index of that element. I could just use the index,
	// but I wanted to make it more obvious.

	// Next you'll need a "cost function", which is just a
	// fancy name for a function that measures how well a
	// model is performing at it's task. To calculate this,
	// the function needs both of the current thetas, and
	// the dataset it's training against
	function costFunction(theta1, theta2, dataset) {
	// Here we generate our model using the function we
	// wrote earlier, giving it our current theta values.
	const model = prepareLinearFunction(theta1,theta2);

	// We apply a procedure to our model for each row
	// in the dataset.
	return dataset.map((x)=>{
	// First we get the difference between what the
	// data row says about the mapping and what our
	// model says.
	const difference = model(x[0])-x[1];

	// We then return the square of that difference.
	return Math.pow(difference,2);
	}) // Now we have an array of the square of the difference
	// between each of the examples and what we predicted for
	// their x value. The following line sums that array, which
	// we divide by the length of the dataset, and return.
	.reduce((x,y)=>{return x+y},0)/dataset.length*2;
	}

	// Ok, now we have a cost function. Next up, we implement
	// "gradient descent", which is just a prodedure for modifying
	// our theta values in a way that will decrease the overall "cost"
	// (otherwise known as "error") of our model.

	function gradientDescent(learningRate, thetaOne, thetaTwo, dataset) {
	// First, we calculate the error of the model that our current
	// values of theta creates
	const error = costFunction(thetaOne, thetaTwo, dataset);

	// Then we use the error, the learning rate to update each theta
	// thetas must be updated simultaneously - don't get into a
	// situation where you update one and you use that new theta
	// to update your next theta. It would probably work, but it's
	// not proper gradient descent.
	/*const newThetas = [thetaOne, thetaTwo].map(function(theta){
	// Note - I don't know how to calculate derivative of the cost
	// function yet. It wasn't covered in this lecture. It's a
	// measure of how far away your thetas are from a "local minimum"
	// in error, it's what guides your thetas towards finding their
	// correct values.
	theta * learningRate * derivitave * error;
	});*/
	//please excuse this hackery nonsense

	const model = prepareLinearFunction(thetaOne,thetaTwo);

	const a = dataset.map((x) => {
	return model(x[0])-x[1];
	}).reduce((x,y) => {return x + y}, 0)/dataset.length;

	const b = dataset.map((x) => {
	return (model(x[0])-x[1])*x[0];
	}).reduce((x,y) => {return x + y}, 0)/dataset.length;

	const newThetas = [
	thetaOne - learningRate * a,
	thetaTwo - learningRate * b
	];
	debugger;
	return { newThetas, error };
	}

	// Lets run through the whole training process
	(function linearRegress(dataset){
	// Initialize thetas to zero. It's not important that
	// they're zero, they could start out as any number,
	// but zero works just fine.
	const initialThetaOne = 0;
	const initialThetaTwo = 0;
	const learningRate = 0.2;

	// Create a named function so that you can recurse
	// through the values of theta that gradient descent gives
	// you until you converge.
	(function reg(learningRate, thetaOne, thetaTwo, dataset) {
	console.log(`Current thetas: ${thetaOne}, ${thetaTwo}`);

	// Run gradient descent
	const {error,newThetas} = gradientDescent(learningRate, thetaOne, thetaTwo, dataset);

	console.log(`Those thetas gave us an error of ${error}.`);

	if (error < 0.00001) {
	// Our error is zero! That means our thetas have
	// "converged", or "settled" on values that make sense
	// for the dataset. Note that because our dataset fits
	// perfectly on a line, we can check that our thetas
	// match exactly. When your dataset is always going to
	// have a certain amount of error in it (i.e. when the
	// data doesn't exactly match any straight line), you'll
	// have to check tolerate some difference.
	console.log(`Convergeance! Line of best fit is: y = ${thetaOne} + ${thetaTwo}x.`);
	process.exit();
	}

	// Recursively call this function with updated thetas until
	// convergeance happens and the condition above is met
	console.log("Running gradient descent again to reduce the error.")
	reg(learningRate, newThetas[0], newThetas[1], dataset);
	})(learningRate, initialThetaOne, initialThetaTwo, dataset);
	})(dataset);
	})();