Created
January 28, 2020 02:56
-
-
Save komasaru/9d58ff0d987ee8751d9a4fc5efeb3bde to your computer and use it in GitHub Desktop.
Ruby script to calculate a Kendall's Rank Correlation Coefficient.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/ruby | |
class Array | |
def rcc_kendall(y) | |
# 以下の場合は例外スロー | |
# - 引数の配列が Array クラスでない | |
# - 自身配列が空 | |
# - 配列サイズが異なる | |
# - 数値以外のデータが存在する | |
raise "Argument is not a Array class!" unless y.class == Array | |
raise "Self array is nil!" if self.size == 0 | |
raise "Argument array size is invalid!" unless self.size == y.size | |
(self + y).each do |v| | |
raise "Items except numerical values exist!" unless v.to_s =~ /[\d\.]+/ | |
end | |
# ランク付け | |
# (同順位を中央(平均)順位(mid-rank)にする必要はない) | |
rank_x = self.map { |v| self.count { |a| a > v } + 1 } | |
rank_y = y.map { |v| y.count { |a| a > v } + 1 } | |
# P(x_s と x_t, y_s と y_t の大小関係が一致する組の数) | |
# Q(x_s と x_t, y_s と y_t の大小関係が不一致の組の数) | |
# (x_s = x_t or y_s = y_t は除く) | |
n, p, q = self.size, 0, 0 | |
0.upto(n - 2).each do |i| | |
(i + 1).upto(n - 1).each do |j| | |
w = (rank_x[i] - rank_x[j]) * (rank_y[i] - rank_y[j]) | |
case | |
when w > 0; p += 1 | |
when w < 0; q += 1 | |
end | |
end | |
end | |
# 同順位 | |
tai_x = rank_x.group_by { |a| a }.map do |k, v| | |
[k, v.size] | |
end.to_h.select { |k, v| v > 1 } | |
tai_y = rank_y.group_by { |a| a }.map do |k, v| | |
[k, v.size] | |
end.to_h.select { |k, v| v > 1 } | |
# Tx, Ty の sum 部分 | |
t_x = tai_x.map { |a| (a[1] * a[1] * a[1] - a[1]) / 2.0 }.sum | |
t_y = tai_y.map { |a| (a[1] * a[1] * a[1] - a[1]) / 2.0 }.sum | |
# 相関係数 | |
nn = (n * n - n) / 2.0 | |
return (p - q) / (Math.sqrt(nn - t_x) * Math.sqrt(nn - t_y)) | |
end | |
end | |
# タイ(同順位)が存在しない例 | |
#X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
#Y = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10] | |
# タイ(同順位)が存在する例 | |
X = [1, 2, 3, 4, 5, 5, 7, 8, 9, 10] | |
Y = [1, 3, 5, 6, 9, 2, 4, 6, 8, 10] | |
# サイズが異なる例 | |
#X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
#Y = [1, 3, 5, 7, 9, 2, 4, 6, 8] | |
# X のサイズがゼロの例 | |
#X = [] | |
#Y = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10] | |
# 数値以外のものが存在する例 | |
#X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
#Y = [1, 3, 5, 7, 9, "ABC", 4, 6, 8, 10] | |
puts " X = #{X}" | |
puts " Y = #{Y}" | |
puts " Kendall's RCC = #{X.rcc_kendall(Y)}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment