Last active
July 31, 2020 01:25
-
-
Save komasaru/3efe686080f8a11ceed884bc81c063ff to your computer and use it in GitHub Desktop.
C++ source code to calculate a coefficent of determination for simple linear regression.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "calc.hpp" | |
#include <cmath> | |
#include <iostream> | |
#include <sstream> | |
#include <vector> | |
/** | |
* @brief 単回帰直線の計算 | |
* | |
* @param[ref] 切片 a (double) | |
* @param[ref] 傾き b (double) | |
* @param[ref] 相関係数 r (double) | |
* @return 真偽 (bool) | |
* @retval true 成功 | |
* @retval false 失敗 | |
*/ | |
bool Calc::reg_line(double& a, double& b, double& r) { | |
unsigned int i; // loop インデックス | |
double s_x = 0.0; // sum(x) | |
double s_y = 0.0; // sum(y) | |
double s_xx = 0.0; // sum(xx) | |
double s_xy = 0.0; // sum(xy) | |
double m_x = 0.0; // x の相加平均 | |
double m_y = 0.0; // y の相加平均 | |
double cov = 0.0; // x と y の共分散 | |
double v_x = 0.0; // x の分散 | |
double v_y = 0.0; // y の分散 | |
try { | |
// sum(x), sum(y), sum(xx), sum(xy) | |
for (i = 0; i < cnt; i++) { | |
s_x += data[i][0]; | |
s_y += data[i][1]; | |
s_xx += data[i][0] * data[i][0]; | |
s_xy += data[i][0] * data[i][1]; | |
} | |
// 行列1行目 | |
mtx.push_back({(double)cnt, s_x, s_y}); | |
// 行列2行目 | |
mtx.push_back({s_x, s_xx, s_xy}); | |
// 計算(ガウスの消去法) | |
if (!solve_ge(mtx)) { | |
std::cout << "[ERROR] Failed to solve by the Gauss-Ellimination method!" | |
<< std::endl; | |
return false; | |
} | |
// 切片、傾き | |
a = mtx[0][2]; | |
b = mtx[1][2]; | |
// 相関係数 | |
m_x = s_x / cnt; | |
m_y = s_y / cnt; | |
for (i = 0; i < cnt; i++) { | |
cov += (data[i][0] - m_x) * (data[i][1] - m_y); | |
v_x += (data[i][0] - m_x) * (data[i][0] - m_x); | |
v_y += (data[i][1] - m_y) * (data[i][1] - m_y); | |
} | |
r = (cov / std::sqrt(v_x)) / std::sqrt(v_y); | |
} catch (...) { | |
return false; // 計算失敗 | |
} | |
return true; // 計算成功 | |
} | |
/** | |
* @brief ガウスの消去法 | |
* | |
* @param[ref] 行列(配列) mtx (double) | |
* @return 真偽 (bool) | |
* @retval true 成功 | |
* @retval false 失敗 | |
*/ | |
bool Calc::solve_ge(std::vector<std::vector<double>>& mtx) { | |
int i; // loop インデックス | |
int j; // loop インデックス | |
int k; // loop インデックス | |
int n; // 元(行)の数 | |
double d; // 計算用 | |
try { | |
n = (int)mtx.size(); | |
// 前進消去 | |
for (k = 0; k < n - 1; k++) { | |
for (i = k + 1; i < n; i++) { | |
d = mtx[i][k] / mtx[k][k]; | |
for (j = k + 1; j <= n; j++) | |
mtx[i][j] -= mtx[k][j] * d; | |
} | |
} | |
// 後退代入 | |
for (i = n - 1; i >= 0; i--) { | |
d = mtx[i][n]; | |
for (j = i + 1; j < n; j++) | |
d -= mtx[i][j] * mtx[j][n]; | |
mtx[i][n] = d / mtx[i][i]; | |
} | |
} catch (...) { | |
return false; // 計算失敗 | |
} | |
return true; // 計算成功 | |
} | |
/** | |
* @brief 計算・推定値 | |
* (y_e = a + b * x) | |
* | |
* @param 切片 a (double) | |
* @param 傾き b (double) | |
* @param[ref] 推定値 y_e (vector<double>) | |
* @return 真偽 (bool) | |
* @retval true 成功 | |
* @retval false 失敗 | |
*/ | |
bool Calc::y_e( | |
double a, double b, | |
std::vector<double>& y_e | |
) { | |
unsigned int i; // loop インデックス | |
try { | |
for (i = 0; i < cnt; i++) | |
y_e.push_back(a + b * data[i][0]); | |
} catch (...) { | |
return false; // 計算失敗 | |
} | |
return true; // 計算成功 | |
} | |
/** | |
* @brief 計算・標本値 Y (目的変数)の平均 | |
* (y_b = sum(y) / size(y)) | |
* | |
* @return 標本値 Y (目的変数)の平均 (double) | |
*/ | |
double Calc::y_b() { | |
unsigned int i; // loop インデックス | |
double s = 0.0; | |
try { | |
for (i = 0; i < cnt; i++) | |
s += data[i][1]; | |
return s / cnt; | |
} catch (...) { | |
throw; | |
} | |
} | |
/** | |
* @brief 計算・推定値の変動 | |
* (sum((y_e - y_b)**2)) | |
* | |
* @param 推定値 y_e (vector<double>) | |
* @param 標本値 Y の平均 y_b (double) | |
* @return 推定値の変動 (double) | |
*/ | |
double Calc::s_r(std::vector<double> y_e, double y_b) { | |
unsigned int i; // loop インデックス | |
double v = 0.0; | |
double s = 0.0; | |
try { | |
for (i = 0; i < cnt; i++) { | |
v = y_e[i] - y_b; | |
s += v * v; | |
} | |
return s; | |
} catch (...) { | |
throw; | |
} | |
} | |
/** | |
* @brief 計算・標本値の変動 | |
* (sum((y - y_b)**2)) | |
* | |
* @param 標本値 Y の平均 y_b (double) | |
* @return 標本値の変動 (double) | |
*/ | |
double Calc::s_y2(double y_b) { | |
unsigned int i; // loop インデックス | |
double v = 0.0; | |
double s = 0.0; | |
try { | |
for (i = 0; i < cnt; i++) { | |
v = data[i][1] - y_b; | |
s += v * v; | |
} | |
return s; | |
} catch (...) { | |
throw; | |
} | |
} | |
/** | |
* @brief 計算・残渣の変動 | |
* (sum((y - y_e)**2)) | |
* | |
* @param 推定値 y_b (vector<double>) | |
* @return 残渣の変動 (double) | |
*/ | |
double Calc::s_e(std::vector<double> y_e) { | |
unsigned int i; // loop インデックス | |
double v = 0.0; | |
double s = 0.0; | |
try { | |
for (i = 0; i < cnt; i++) { | |
v = data[i][1] - y_e[i]; | |
s += v * v; | |
} | |
return s; | |
} catch (...) { | |
throw; | |
} | |
} | |
/** | |
* @brief 計算・決定係数(公式使用) | |
* | |
* @param 回帰直線の傾き b (double) | |
* @return 決定係数 (double) | |
*/ | |
double Calc::r_2(double b) { | |
unsigned int i; // loop インデックス | |
double s_x = 0.0; | |
double s_y = 0.0; | |
double s_y2 = 0.0; | |
double s_xy = 0.0; | |
try { | |
for (i = 0; i < cnt; i++) { | |
s_x += data[i][0]; | |
s_y += data[i][1]; | |
s_y2 += data[i][1] * data[i][1]; | |
s_xy += data[i][0] * data[i][1]; | |
} | |
return b * (s_xy - s_x * s_y / cnt) / (s_y2 - s_y * s_y / cnt); | |
} catch (...) { | |
throw; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef COEFFICIENT_OF_DETERMINATION_LINE_CALC_HPP_ | |
#define COEFFICIENT_OF_DETERMINATION_LINE_CALC_HPP_ | |
#include <vector> | |
class Calc { | |
std::vector<std::vector<double>> data; // 元データ | |
std::vector<std::vector<double>> mtx; // 計算用行列 | |
bool solve_ge(std::vector<std::vector<double>>&); // ガウスの消去法 | |
public: | |
Calc(std::vector<std::vector<double>>& data) : data(data) { | |
cnt = data.size(); | |
} | |
unsigned int cnt; // データ件数 | |
bool reg_line(double&, double&, double&); // 単回帰直線の計算 | |
bool y_e(double, double, std::vector<double>&); // 計算・推定値 | |
double y_b(); // 計算・標本値 Y (目的変数)の平均 | |
double s_r(std::vector<double>, double); // 計算・推定値の変動 | |
double s_y2(double); // 計算・標本値の変動 | |
double s_e(std::vector<double>); // 計算・残渣の変動 | |
double r_2(double); // 計算・決定係数(公式使用) | |
}; | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*********************************************************** | |
単回帰分析(線形回帰)決定係数計算 | |
: y = a + b * x | |
: 連立方程式をガウスの消去法で解く方法 | |
DATE AUTHOR VERSION | |
2020.06.22 mk-mode.com 1.00 新規作成 | |
Copyright(C) 2020 mk-mode.com All Rights Reserved. | |
***********************************************************/ | |
#include "calc.hpp" | |
#include "file.hpp" | |
#include <cstdlib> // for EXIT_XXXX | |
#include <iomanip> // for setprecision | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
int main(int argc, char* argv[]) { | |
std::string f_data; // データファイル名 | |
std::vector<std::vector<double>> data; // データ配列 | |
std::size_t i; // loop インデックス | |
double a; // 切片 | |
double b; // 傾き | |
double r; // 相関係数 | |
std::vector<double> y_e; // 推定値 | |
double y_b; // 標本値 Y (目的変数)の平均 | |
double s_r; // 推定値の変動 | |
double s_y2; // 標本値の変動 | |
double s_e; // 残差の変動 | |
try { | |
// === コマンドライン引数のチェック | |
if (argc != 2) { | |
std::cerr << "[ERROR] Number of arguments is wrong!\n" | |
<< "[USAGE] ./coefficient_of_determination_line <file_name>" | |
<< std::endl; | |
return EXIT_FAILURE; | |
} | |
// === ファイル名取得 | |
f_data = argv[1]; | |
// === データ取得 | |
File file(f_data); | |
if (!file.get_text(data)) { | |
std::cout << "[ERROR] Failed to read the file!" << std::endl; | |
return EXIT_FAILURE; | |
} | |
std::cout << std::fixed << std::setprecision(4); | |
std::cout << "説明変数 X 目的変数 Y" << std::endl; | |
for (i = 0; i < data.size(); i++) | |
std::cout << std::setw(10) << std::right << data[i][0] | |
<< " " | |
<< std::setw(10) << std::right << data[i][1] | |
<< std::endl; | |
// === 計算(線形回帰) | |
Calc calc(data); | |
if (!calc.reg_line(a, b, r)) { | |
std::cout << "[ERROR] Failed to calculate!" << std::endl; | |
return EXIT_FAILURE; | |
} | |
std::cout << std::fixed << std::setprecision(8); | |
std::cout << "---\n" | |
<< " 切片 a = " << std::setw(16) << std::right << a | |
<< "\n" | |
<< " 傾き b = " << std::setw(16) << std::right << b | |
<< "\n" | |
<< "相関係数 r = " << std::setw(16) << std::right << r | |
<< std::endl; | |
// === 計算(決定係数) | |
// 推定値 | |
if (!calc.y_e(a, b, y_e)) { | |
std::cout << "[ERROR] Failed to calculate!" << std::endl; | |
return EXIT_FAILURE; | |
} | |
// 標本値 Y (目的変数)の平均 | |
y_b = calc.y_b(); | |
// 推定値の変動 | |
s_r = calc.s_r(y_e, y_b); | |
// 標本値の変動 | |
s_y2 = calc.s_y2(y_b); | |
// 残差の変動 | |
s_e = calc.s_e(y_e); | |
std::cout << "---" << std::endl; | |
// 解法-1. 決定係数 (= 推定値の変動 / 標本値の変動) | |
std::cout << " R2 (1) = " << std::setprecision(16) << std::right | |
<< s_r / s_y2 << std::endl; | |
// 解法-2. 決定係数 (= 1 - 残差の変動 / 標本値の変動) | |
std::cout << " R2 (2) = " << std::setprecision(16) << std::right | |
<< 1.0 - s_e / s_y2 << std::endl; | |
// 解法-3. 決定係数 (公式使用(解法-1,2の変形)) | |
std::cout << " R2 (3) = " << std::setprecision(16) << std::right | |
<< calc.r_2(b) << std::endl; | |
// 解法-4. 決定係数 (= 相関係数の2乗) | |
std::cout << " R2 (4) = " << std::setprecision(16) << std::right | |
<< r * r << std::endl; | |
} catch (...) { | |
std::cerr << "EXCEPTION!" << std::endl; | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "file.hpp" | |
#include <iostream> | |
#include <sstream> | |
#include <string> | |
#include <vector> | |
bool File::get_text(std::vector<std::vector<double>>& data) { | |
try { | |
// ファイル OPEN | |
std::ifstream ifs(f_data); | |
if (!ifs.is_open()) return false; // 読み込み失敗 | |
// ファイル READ | |
std::string buf; // 1行分バッファ | |
while (getline(ifs, buf)) { | |
std::vector<double> rec; // 1行分ベクタ | |
std::istringstream iss(buf); // 文字列ストリーム | |
std::string field; // 1列分文字列 | |
// 1行分文字列を1行分ベクタに追加 | |
double x, y; | |
while (iss >> x >> y) { | |
rec.push_back(x); | |
rec.push_back(y); | |
} | |
// 1行分ベクタを data ベクタに追加 | |
if (rec.size() != 0) data.push_back(rec); | |
} | |
} catch (...) { | |
std::cerr << "EXCEPTION!" << std::endl; | |
return false; | |
} | |
return true; // 読み込み成功 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef COEFFICIENT_OF_DETERMINATION_LINE_FILE_HPP_ | |
#define COEFFICIENT_OF_DETERMINATION_LINE_FILE_HPP_ | |
#include <fstream> | |
#include <string> | |
#include <vector> | |
class File { | |
std::string f_data; | |
public: | |
File(std::string f_data) : f_data(f_data) {} | |
bool get_text(std::vector<std::vector<double>>&); | |
}; | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment