Created
March 30, 2019 02:44
Ruby script to calculate a coefficent of determination for simple linear regression.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/ruby | |
#********************************************************* | |
# Ruby script to calculate a coefficient of determination. | |
#********************************************************* | |
# | |
class CoefficientOfDetermination | |
# 説明変数と目的変数 | |
X = [107, 336, 233, 82, 61, 378, 129, 313, 142, 428] | |
Y = [286, 851, 589, 389, 158, 1037, 463, 563, 372, 1020] | |
# Execution | |
def exec | |
puts "説明変数 X = {#{X.join(', ')}}" | |
puts "目的変数 Y = {#{Y.join(', ')}}" | |
puts "---" | |
# 単回帰直線算出(切片と傾き) | |
reg_line = X.reg_line(Y) | |
puts " 切片 a = %20.16f" % reg_line[:intercept] | |
puts " 傾き b = %20.16f" % reg_line[:slope] | |
# 相関係数 | |
r = X.r(Y) | |
puts "相関係数 r = %20.16f" % r | |
# 推定値 | |
y_e = calc_estimations(X, Y, reg_line[:intercept], reg_line[:slope]) | |
# 標本値 Y (目的変数)の平均 | |
y_b = Y.inject(0) { |s, a| s += a } / Y.size.to_f | |
puts "決定係数" | |
# 解法-1. 決定係数 (= 推定値の変動 / 標本値の変動) | |
r_2 = calc_s_r(y_b, y_e) / calc_s_y2(y_b, Y) | |
puts " R2 (1) = %20.16f" % r_2 | |
# 解法-2. 決定係数 (= 1 - 残差の変動 / 標本値の変動) | |
r_2 = 1.0 - calc_s_e(Y, y_e) / calc_s_y2(y_b, Y) | |
puts " R2 (2) = %20.16f" % r_2 | |
# 解法-3. 決定係数 (公式使用(解法-1,2の変形)) | |
r_2 = calc_r_2(X, Y, reg_line[:slope]) | |
puts " R2 (3) = %20.16f" % r_2 | |
# 解法-4. 決定係数 (= 相関係数の2乗) | |
r_2 = r * r | |
puts " R2 (4) = %20.16f" % r_2 | |
rescue => e | |
$stderr.puts "[#{e.class}] #{e.message}" | |
e.backtrace.each{ |tr| $stderr.puts "\t#{tr}" } | |
exit 1 | |
end | |
private | |
# 推定値 | |
# | |
# @param xs: 説明変数配列 | |
# @param y: 目的変数配列 | |
# @param a: 回帰直線の切片 | |
# @param b: 回帰直線の傾き | |
# @return y_e: 推定値配列 | |
def calc_estimations(xs, y, a, b) | |
y_e = Array.new | |
begin | |
xs.each { |x| y_e << a + b * x } | |
return y_e | |
rescue => e | |
raise | |
end | |
end | |
# 推定値の変動 | |
# | |
# @param y_b: 標本値(目的変数)の平均 | |
# @param y_e: 推定値配列 | |
# @return s_r: 推定値の変動 | |
def calc_s_r(y_b, y_e) | |
s_r = 0.0 | |
begin | |
y_e.each do |a| | |
v = a - y_b | |
s_r += v * v | |
end | |
return s_r | |
rescue => e | |
raise | |
end | |
end | |
# 標本値の変動 | |
# | |
# @param y_b: 標本値(目的変数)の平均 | |
# @param y_s: 標本値(目的変数)配列 | |
# @return s_y2: 標本値の変動 | |
def calc_s_y2(y_b, y_s) | |
s_y2 = 0.0 | |
begin | |
y_s.each do |a| | |
v = a - y_b | |
s_y2 += v * v | |
end | |
return s_y2 | |
rescue => e | |
raise | |
end | |
end | |
# 残差の変動 | |
# | |
# @param y_s: 標本値(目的変数)配列 | |
# @param y_e: 推定値配列 | |
# @return s_e: 残差の変動 | |
def calc_s_e(y_s, y_e) | |
s_e = 0.0 | |
begin | |
y_s.zip(y_e).each do |a, b| | |
v = a - b | |
s_e += v * v | |
end | |
return s_e | |
rescue => e | |
raise | |
end | |
end | |
# 決定係数 (公式使用) | |
# | |
# @param x: 説明変数配列 | |
# @param y: 目的変数配列 | |
# @param b: 回帰直線の傾き | |
# @return r_2: 残差の変動 | |
def calc_r_2(x, y, b) | |
n = x.size | |
sum_x = x.inject(0.0) { |s, a| s += a } | |
sum_y = y.inject(0.0) { |s, a| s += a } | |
sum_y2 = y.inject(0.0) { |s, a| s += a * a } | |
sum_xy = x.zip(y).inject(0.0) { |s, a| s += a[0] * a[1] } | |
s_y2 = sum_y2 - sum_y * sum_y / n.to_f | |
s_xy = sum_xy - sum_x * sum_y / n.to_f | |
s_r = b * s_xy | |
return s_r / s_y2 | |
rescue => e | |
raise | |
end | |
end | |
class Array | |
# 単回帰直線 | |
def reg_line(y) | |
# 以下の場合は例外スロー | |
# - 引数の配列が Array クラスでない | |
# - 自身配列が空 | |
# - 配列サイズが異なれば例外 | |
raise "Argument is not a Array class!" unless y.class == Array | |
raise "Self array is nil!" if self.size == 0 | |
raise "Argument array size is invalid!" unless self.size == y.size | |
# x の総和 | |
sum_x = self.inject(0) { |s, a| s += a } | |
# y の総和 | |
sum_y = y.inject(0) { |s, a| s += a } | |
# x^2 の総和 | |
sum_xx = self.inject(0) { |s, a| s += a * a } | |
# x * y の総和 | |
sum_xy = self.zip(y).inject(0) { |s, a| s += a[0] * a[1] } | |
# 切片 a | |
a = sum_xx * sum_y - sum_xy * sum_x | |
a /= (self.size * sum_xx - sum_x * sum_x).to_f | |
# 傾き b | |
b = self.size * sum_xy - sum_x * sum_y | |
b /= (self.size * sum_xx - sum_x * sum_x).to_f | |
{intercept: a, slope: b} | |
end | |
# 相関係数 | |
def r(y) | |
# 以下の場合は例外スロー | |
# - 引数の配列が Array クラスでない | |
# - 自身配列が空 | |
# - 配列サイズが異なれば例外 | |
raise "Argument is not a Array class!" unless y.class == Array | |
raise "Self array is nil!" if self.size == 0 | |
raise "Argument array size is invalid!" unless self.size == y.size | |
# x の相加平均, y の相加平均 (arithmetic mean) | |
mean_x = self.inject(0) { |s, a| s += a } / self.size.to_f | |
mean_y = y.inject(0) { |s, a| s += a } / y.size.to_f | |
# x と y の共分散の分子 (covariance) | |
cov = self.zip(y).inject(0) { |s, a| s += (a[0] - mean_x) * (a[1] - mean_y) } | |
# x の分散の分子, y の分散の分子 (variance) | |
var_x = self.inject(0) { |s, a| s += (a - mean_x) ** 2 } | |
var_y = y.inject(0) { |s, a| s += (a - mean_y) ** 2 } | |
# 相関係数 (correlation coefficient) | |
r = cov / Math.sqrt(var_x) | |
r /= Math.sqrt(var_y) | |
end | |
end | |
CoefficientOfDetermination.new.exec if __FILE__ == $0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment