Last active
December 2, 2020 22:23
-
-
Save lukasbischof/bf8596825b5af03343df0e99dde4921d to your computer and use it in GitHub Desktop.
Multivariate statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
# rubocop:disable Lint/MissingCopEnableDirective | |
# rubocop:disable Layout/LineLength | |
require 'json' | |
input = [[9, 2], [4, 4], [7, 3], [9, 7], [0, 7]] # Change this as you need | |
def get_x(input) | |
input.map { |(x, _y)| x } | |
end | |
def get_y(input) | |
input.map { |(_x, y)| y } | |
end | |
def rank_array(array) | |
python = <<~PYTHON | |
python -c "from scipy.stats import rankdata; print(list(rankdata(#{array})))" | |
PYTHON | |
JSON.parse(`#{python}`).map(&:to_r).map { |r| r.denominator == 1 ? r.to_i : r } | |
end | |
def display_ranking(rank) | |
o = rank.map { |r| r.is_a?(Rational) ? "#{r.numerator}/#{r.denominator}" : r }.join(',') | |
"[#{o}]" | |
end | |
def mean(arr) | |
(1.0 / arr.length) * arr.sum | |
end | |
def covariance(arr, x_mean = nil, y_mean = nil) | |
x_mean ||= mean(get_x(arr)) | |
y_mean ||= mean(get_y(arr)) | |
(1.0 / arr.length) * arr.sum { |(x, y)| (x - x_mean) * (y - y_mean) } | |
end | |
def standard_deviation(arr) | |
mean = mean(arr) | |
Math.sqrt((1.0 / arr.length.to_f) * (arr.sum { |x| (x - mean)**2 })) | |
end | |
def pearson(covariance, sx, sy) | |
covariance / (sx * sy) | |
end | |
# Process input data | |
x = get_x(input) | |
y = get_y(input) | |
x_sorted = x.sort | |
y_sorted = y.sort | |
x_rank = rank_array(x) | |
y_rank = rank_array(y) | |
# Classify correlation | |
input_covariance = covariance(input).round(2) | |
sx = standard_deviation(x) | |
sy = standard_deviation(y) | |
rank_covariance = covariance(x_rank.zip(y_rank)).round(2) | |
# Output evaluation | |
[ | |
'Eingabeevaluation: ', | |
'-' * 30, | |
"X: #{x}", | |
"Y: #{y}", | |
"Sortierte X: #{x_sorted}", | |
"Sortierte Y: #{y_sorted}", | |
"Rangliste x: #{display_ranking(x_rank)}", | |
"Rangliste y: #{display_ranking(y_rank)}", | |
'', | |
'Auswertung: ', | |
'-' * 30, | |
"Kovarianz der Eingabe: #{input_covariance}", | |
"Sx (Standardabweichung): #{sx}", | |
"Sy (Standardabweichung): #{sy}", | |
"Pearson: #{pearson(input_covariance, sx, sy).round(2)}", | |
"Kovarianz der Ranglisten: #{rank_covariance}", | |
"Spearman Korrelationskoeffizient: #{pearson(rank_covariance, standard_deviation(x_rank), standard_deviation(y_rank)).round(2)}" | |
].each(&method(:puts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Requirements:
PATH