Last active
June 19, 2018 17:38
-
-
Save c650/18a960b3f32e306915ef4091fa65f573 to your computer and use it in GitHub Desktop.
Takes CSV. Calculates pairwise correlations (excludes empty cells)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
# correlation-inator.rb | |
# by Charles (c650) | |
# Takes in CSV, calculates pairwise correlations but excluding empty cells | |
class Pair | |
attr_accessor :a, :b | |
def initialize(a, b) | |
a.gsub!(/#/,"") | |
b.gsub!(/#/,"") | |
@a = a.to_f | |
@b = b.to_f | |
end | |
end | |
if ARGV.empty? | |
puts "Usage: ./doit.rb [path to csv]" | |
exit | |
end | |
data = CSV.read(ARGV[0]) | |
correl_matrix = Array.new(data.length-1){Array.new(data.length-1, 1000)} | |
point_names = [nil] | |
for i in 1...data.length | |
point_names << data[i][0] | |
for j in (i+1)...data.length | |
tmp = [] | |
for k in 1...[data[i].length, data[j].length].min | |
tmp << Pair.new(data[i][k], data[j][k]) unless data[i][k].nil? || data[j][k].nil? | |
end | |
x_sum = 0 | |
y_sum = 0 | |
tmp.each do |pair| | |
x_sum += pair.a; | |
y_sum += pair.b; | |
end | |
x_avg = x_sum / tmp.length | |
y_avg = y_sum / tmp.length | |
top_sum = 0 | |
x_bottom_sum = 0 | |
y_bottom_sum = 0 | |
tmp.each do |pair| | |
top_sum += (pair.a - x_avg) * (pair.b - y_avg) | |
x_bottom_sum += (pair.a - x_avg) ** 2 | |
y_bottom_sum += (pair.b - y_avg) ** 2 | |
end | |
correl_matrix[i-1][j-1] = correl_matrix[j-1][i-1] = top_sum / Math.sqrt(x_bottom_sum * y_bottom_sum) | |
end | |
end | |
CSV.open("./out.csv", "wb") do |csv| | |
csv << point_names | |
correl_matrix.each_with_index do |row, i| | |
tmp = [point_names[i+1]] | |
tmp << row | |
tmp.flatten! | |
csv << tmp | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment