Instantly share code, notes, and snippets.

# NikolayIT/LinearRegression.cs Created Mar 17, 2017

Linear regression implementation in pure C# with example of Bulgarian population prediction
 namespace LinearRegression { using System; using System.Diagnostics; public static class Program { public static void Main() { var xValues = new double[] { 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 }; var yValues = new double[] { 8669269, 8595500, 8484900, 8459800, 8427400, 8384700, 8340900, 8283200, 8230400, 8190900, 8149468, 7932984, 7845841, 7801273, 7761049, 7720000, 7679290, 7640238, 7606551, 7563710 }; double rSquared, intercept, slope; LinearRegression(xValues, yValues, out rSquared, out intercept, out slope); Console.WriteLine(\$"R-squared = {rSquared}"); Console.WriteLine(\$"Intercept = {intercept}"); Console.WriteLine(\$"Slope = {slope}"); var predictedValue = (slope * 2017) + intercept; Console.WriteLine(\$"Prediction for 2017: {predictedValue}"); } /// /// Fits a line to a collection of (x,y) points. /// /// The x-axis values. /// The y-axis values. /// The r^2 value of the line. /// The y-intercept value of the line (i.e. y = ax + b, yIntercept is b). /// The slop of the line (i.e. y = ax + b, slope is a). public static void LinearRegression( double[] xVals, double[] yVals, out double rSquared, out double yIntercept, out double slope) { if (xVals.Length != yVals.Length) { throw new Exception("Input values should be with the same length."); } double sumOfX = 0; double sumOfY = 0; double sumOfXSq = 0; double sumOfYSq = 0; double sumCodeviates = 0; for (var i = 0; i < xVals.Length; i++) { var x = xVals[i]; var y = yVals[i]; sumCodeviates += x * y; sumOfX += x; sumOfY += y; sumOfXSq += x * x; sumOfYSq += y * y; } var count = xVals.Length; var ssX = sumOfXSq - ((sumOfX * sumOfX) / count); var ssY = sumOfYSq - ((sumOfY * sumOfY) / count); var rNumerator = (count * sumCodeviates) - (sumOfX * sumOfY); var rDenom = (count * sumOfXSq - (sumOfX * sumOfX)) * (count * sumOfYSq - (sumOfY * sumOfY)); var sCo = sumCodeviates - ((sumOfX * sumOfY) / count); var meanX = sumOfX / count; var meanY = sumOfY / count; var dblR = rNumerator / Math.Sqrt(rDenom); rSquared = dblR * dblR; yIntercept = meanY - ((sCo / ssX) * meanX); slope = sCo / ssX; } } }

### varshithabk commented Oct 10, 2018

 Thank you for this code!!

### diegogazzolo commented May 21, 2019

 Thanks a lot!

### Eibwen commented May 28, 2019

 @SIMOMEGA, its fairly standard concepts for statistics, I'm not sure how much documentation really can be added to the code here. This might help even though it has lots of math notation: https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example For my usage I did quickly convert it to returning an object which I thought I'd share: ``````public class LinearRegressionComponents { /// The r^2 value of the line. Used to give an idea of the accuracy given the input values public double rSquared { get; set; } /// The y-intercept value of the line (i.e. y = ax + b, yIntercept is b). public double yIntercept { get; set; } /// The slop of the line (i.e. y = ax + b, slope is a). public double slope { get; set; } public double CalculatePrediction(double input) { return (input * slope) + yIntercept; } } ``````

### alexandruionescu commented Jan 7, 2020

 Cheers for the code snipet. The "ssY" variable does not seem to be used anywhere?