Skip to content

Instantly share code, notes, and snippets.

@leocb
Last active August 29, 2023 00:52
Show Gist options
  • Save leocb/f32303b0eaf8ab0efa0388f5f966335f to your computer and use it in GitHub Desktop.
Save leocb/f32303b0eaf8ab0efa0388f5f966335f to your computer and use it in GitHub Desktop.
Simple C# Linear Regression Class
// Based on https://gist.github.com/NikolayIT/d86118a3a0cb3f5ed63d674a350d75f2
// With some tweaks
// This file is licensed under MIT license
public class LinearRegression
{
public static double MinimumLength { get; } = 3;
public double Average { get; private set; } = 0;
public double R2 { get; private set; } = 1;
public double Slope { get; private set; } = 0;
public double StdDev { get; private set; } = 0;
public double ResidualStdError { get; private set; } = double.NaN;
public double YIntercept { get; private set; } = 0;
public double DegreesOfFreedom { get; private set; } = 0;
/// <summary>
/// Set values and calculate the results for this linear regression
/// </summary>
/// <param name="xs">Known x values, must be the same length as ys</param>
/// <param name="ys">Known y values, must be the same length as xs</param>
/// <param name="useSecondPass">Set to true (default) if you wish to execute the second pass,
/// this is required by <see cref="ResidualStdError"/>. But can be slow on large datasets</param>
public void SetValues(double[] xs, double[] ys, bool useSecondPass = true)
{
this.Average = 0;
this.R2 = 1;
this.Slope = 0;
this.StdDev = 0;
this.ResidualStdError = double.NaN;
this.YIntercept = 0;
if (xs.Length != ys.Length || xs.Length < MinimumLength)
return;
// First pass to calculate base Linear Regression parameters
double sumX = 0;
double sumY = 0;
double sumXsq = 0;
double sumYsq = 0;
double codeviates = 0;
int count = xs.Length;
for (int i = 0; i < count; i++)
{
double x = xs[i];
double y = ys[i];
codeviates += x * y;
sumX += x;
sumY += y;
sumXsq += x * x;
sumYsq += y * y;
}
double averageX = sumX / count;
double averageY = sumY / count;
double sumSquaresX = sumXsq - (averageX * averageX * count);
double sumSquaresY = sumYsq - (averageY * averageY * count);
double rNumerator = (count * codeviates) - (sumX * sumY);
double rDenominator = ((count * sumXsq) - (sumX * sumX)) * ((count * sumYsq) - (sumY * sumY));
double R = rNumerator / Math.Sqrt(rDenominator);
double correlation = codeviates - (averageX * averageY * count);
this.Average = averageY;
this.R2 = R * R;
this.Slope = correlation / sumSquaresX;
this.StdDev = Math.Sqrt(sumSquaresY / (count - 1));
this.ResidualStdError = Math.Sqrt(sumSquaresY / (count - 1));
this.YIntercept = averageY - (this.Slope * averageX);
this.DegreesOfFreedom = count - 2;
// Second pass to calculate residual error. Can be skiped.
if (!useSecondPass)
return;
double sumDistanceSq = 0;
for (int i = 0; i < count; i++)
{
double x = xs[i];
double y = ys[i];
double yPredict = (x * this.Slope) + this.YIntercept;
double distance = y - yPredict;
sumDistanceSq += distance * distance;
}
this.ResidualStdError = Math.Sqrt(sumDistanceSq / this.DegreesOfFreedom);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment