Skip to content

Instantly share code, notes, and snippets.

@lukasbischof
Last active December 2, 2020 22:23
Show Gist options
  • Save lukasbischof/bf8596825b5af03343df0e99dde4921d to your computer and use it in GitHub Desktop.
Save lukasbischof/bf8596825b5af03343df0e99dde4921d to your computer and use it in GitHub Desktop.
Multivariate statistics
# frozen_string_literal: true
# rubocop:disable Lint/MissingCopEnableDirective
# rubocop:disable Layout/LineLength
require 'json'
input = [[9, 2], [4, 4], [7, 3], [9, 7], [0, 7]] # Change this as you need
def get_x(input)
input.map { |(x, _y)| x }
end
def get_y(input)
input.map { |(_x, y)| y }
end
def rank_array(array)
python = <<~PYTHON
python -c "from scipy.stats import rankdata; print(list(rankdata(#{array})))"
PYTHON
JSON.parse(`#{python}`).map(&:to_r).map { |r| r.denominator == 1 ? r.to_i : r }
end
def display_ranking(rank)
o = rank.map { |r| r.is_a?(Rational) ? "#{r.numerator}/#{r.denominator}" : r }.join(',')
"[#{o}]"
end
def mean(arr)
(1.0 / arr.length) * arr.sum
end
def covariance(arr, x_mean = nil, y_mean = nil)
x_mean ||= mean(get_x(arr))
y_mean ||= mean(get_y(arr))
(1.0 / arr.length) * arr.sum { |(x, y)| (x - x_mean) * (y - y_mean) }
end
def standard_deviation(arr)
mean = mean(arr)
Math.sqrt((1.0 / arr.length.to_f) * (arr.sum { |x| (x - mean)**2 }))
end
def pearson(covariance, sx, sy)
covariance / (sx * sy)
end
# Process input data
x = get_x(input)
y = get_y(input)
x_sorted = x.sort
y_sorted = y.sort
x_rank = rank_array(x)
y_rank = rank_array(y)
# Classify correlation
input_covariance = covariance(input).round(2)
sx = standard_deviation(x)
sy = standard_deviation(y)
rank_covariance = covariance(x_rank.zip(y_rank)).round(2)
# Output evaluation
[
'Eingabeevaluation: ',
'-' * 30,
"X: #{x}",
"Y: #{y}",
"Sortierte X: #{x_sorted}",
"Sortierte Y: #{y_sorted}",
"Rangliste x: #{display_ranking(x_rank)}",
"Rangliste y: #{display_ranking(y_rank)}",
'',
'Auswertung: ',
'-' * 30,
"Kovarianz der Eingabe: #{input_covariance}",
"Sx (Standardabweichung): #{sx}",
"Sy (Standardabweichung): #{sy}",
"Pearson: #{pearson(input_covariance, sx, sy).round(2)}",
"Kovarianz der Ranglisten: #{rank_covariance}",
"Spearman Korrelationskoeffizient: #{pearson(rank_covariance, standard_deviation(x_rank), standard_deviation(y_rank)).round(2)}"
].each(&method(:puts))
@lukasbischof
Copy link
Author

Requirements:

  • Ruby (obviously)
  • Python executable in PATH
  • scipy installed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment