Skip to content

Instantly share code, notes, and snippets.

Created January 12, 2013 04:04
Show Gist options
  • Save anonymous/4515990 to your computer and use it in GitHub Desktop.
Save anonymous/4515990 to your computer and use it in GitHub Desktop.
Linear regression program that the reads four data sets from
c:\egr111\temperaturedata.txt and determines which data set has the most linear
relationship. Prints a detailed analysis to c:\egr111\results.txt and declares
the proper data set on-screen, along with its r-squared value*/
using namespace std;
#include<iostream>
#include<fstream>
#include<iomanip>
#include<cmath>
void readit(float[11][5]);
void writeit(float[11][5], float[3], float[3], float[3]);
int calcit(float[3]);
void regression(int, float&, float&, float&, float[11][5]);
int main()
{
float masterintercept, masterslope, mastergoodnessoffit;
float slope[3], intercept[3], goodnessoffit[3];
float tempdata[11][5];
readit(tempdata);
//block runs regression on all four data sets. Forcolumn is sent to regression()
//to instruct the function on what data set (colums 1-4) to process. Results are
//returned as masterintercept, masterslope and mastergoodnessoffit. Saves all
//reuslts in slope[], intercept[] and goodnessoffit[].
for (int forcolumn = 0; forcolumn < 4; forcolumn++)
{
regression(forcolumn, masterslope, masterintercept,
mastergoodnessoffit, tempdata);
slope[forcolumn] = masterslope;
intercept[forcolumn] = masterintercept;
goodnessoffit[forcolumn] = pow(mastergoodnessoffit ,2);
}
writeit(tempdata, goodnessoffit, slope, intercept);
system("PAUSE");
return 0;
}
void readit(float tempdata[11][5])
{
char skipline[80];
ifstream indata("c:\\egr111\\temperaturedata.txt");
if(!indata)
{
cout << "There is no data file, temperaturedata.txt" << endl;
system("PAUSE");
exit(0);
}
indata.getline(skipline, 80);
for(int row = 0; row < 11; row++)
for(int column = 0; column < 5; column++)
indata >> tempdata[row][column];
indata.close();
}
void writeit(float tempdata[11][5], float goodnessoffit[3], float slope[3],
float intercept[3])
{
int bestfit = calcit(goodnessoffit); //cloumn # of most accurate data
cout << "The most linear data set is R" << bestfit + 1
<< " with an rsquared value of "
<< goodnessoffit[bestfit] << ".\n\n";
ofstream outdata("c:\\egr111\\results.txt");
outdata << " T(F) R1 R2 R3 R4\n";
for(int row = 0; row < 5; row++)
outdata << right << fixed << setprecision(1) << " " << tempdata[row][0]
<< setw(9) << tempdata[row][1] <<" "<< tempdata[row][2]
<<" "<< tempdata[row][3] <<" "<< tempdata[row][4]
<< endl;
for(int row = 5; row < 11; row++)
outdata << setw(4) << tempdata[row][0] << setw(9) << tempdata[row][1]
<<" "<< tempdata[row][2] <<" "<< tempdata[row][3]
<<" "<< tempdata[row][4] << endl;
outdata << setprecision(3) << endl << "slope" << setw(9) << slope[0]
<< setw(9) << slope[1] << setw(9) << slope[2] << setw(9)
<< slope[3];
outdata << setprecision(2) << endl << "intcpt" << setw(8) << intercept[0]
<< setw(9) << intercept[1] << setw(9) << intercept[2] << setw(9)
<< intercept[3];
outdata << setprecision(3) << endl << "rsqrd" << setw(9) << goodnessoffit[0]
<< setw(9) << goodnessoffit[1] << setw(9) << goodnessoffit[2]
<< setw(9) << goodnessoffit[3] << endl << endl;
outdata << setprecision(7) << "The most linear data set is R"
<< bestfit + 1 <<" with an\nrsquared value of "
<< goodnessoffit[bestfit] << "\n\n";
outdata.close();
}
//calcit determines which of the rsquared values is greatest and returns an
//interger corresponding to its location in goodnessoffit[].
int calcit(float goodnessoffit[3])
{
float biggest = 0;
int bestfit;
for (int counter = 0; counter < 4; counter++)
if (goodnessoffit[counter] > biggest)
{
biggest = goodnessoffit[counter];
bestfit = counter;
}
return bestfit;
}
//Modular function that handles all regression calculations.
//x is temerature, y is resistance.
void regression(int forcolumn, float& masterslope, float& masterintercept,
float& mastergoodnessoffit, float tempdata[11][5])
{
float sumofx = 0, sumofy = 0, sumofxy = 0, sumofxsqrd = 0, sumofysqrd = 0;
float avex, avey; //averege of x, average of y.
//These sums are for final calculations below.
for (int counter = 0; counter < 11; counter++)
{
sumofx = sumofx + tempdata[counter][0];
sumofy = sumofy + tempdata[counter][forcolumn + 1];
sumofxy = sumofxy + tempdata[counter][0]*tempdata[counter][forcolumn+1];
sumofxsqrd = sumofxsqrd + pow(tempdata[counter][0], 2);
sumofysqrd = sumofysqrd + pow(tempdata[counter][forcolumn + 1], 2);
}
avey = sumofy/11;
avex = sumofx/11;
//calculating slope, y-intercept and fit accuracy using the least squares method
//of linear regression, where n=number of data points, 11 in this case.
//slope = [n*sum of(xy) - sum of(x)*sum of(y)]/
// [n*sum of(x^2)-(sum of (x))^2]
//intercept = [sum of(y) - slope*sum of(x)]/[n]
//goodnessoffit=[sum of(xy)-n*average(x)*average(y)]/
// [sqrt(sum of(x^2)-n*average(x)^2)*
// sqrt(sum of(y^2)-n*average(y^2)]
masterslope = (11*sumofxy - sumofx*sumofy)/
(11*sumofxsqrd - pow(sumofx ,2));
masterintercept = (sumofy - masterslope*sumofx)/(11);
mastergoodnessoffit = (sumofxy - 11*avex*avey)/
(sqrt(sumofxsqrd - 11*pow(avex, 2))*
sqrt(sumofysqrd - 11*pow(avey ,2)));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment