Last active
December 29, 2021 21:44
-
-
Save mandrewstuart/e1c584a36ca5394cc934542731b4d8c2 to your computer and use it in GitHub Desktop.
Modified Xi correlation, implementation of Chatterjee's new coefficient for nonlinear relationships (including sinusoidal)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://arxiv.org/pdf/1909.10140.pdf | |
from random import shuffle | |
from math import sin | |
def xicor_integrated(X, Y): | |
# sort Y based on X | |
new_vars = [] | |
for i in range(len(X)): | |
new_vars.append([X[i], Y[i]]) | |
new_vars.sort() | |
Y = [i[1] for i in new_vars] | |
# calculate rank of Ys when X is ordered | |
top_ranks = [] | |
for index in range(len(Y)): | |
rank = len([item for item in Y if item <= Y[index]]) | |
top_ranks.append(rank) | |
# calculate sum of abs diffs of ranks | |
numerator = 0 | |
for index in range(1, len(top_ranks)): | |
numerator += abs(top_ranks[index] - top_ranks[index - 1]) | |
# calculations for denominator | |
bottom_ranks = [] | |
for index in range(len(Y)): | |
count = len([item for item in Y if item >= Y[index]]) | |
bottom_ranks.append(count) | |
# calculation product for denominator | |
denominator = 0 | |
for index in range(len(Y)): | |
denominator += bottom_ranks[index] * \ | |
(1 + index - bottom_ranks[index]) | |
output = { | |
'ordered': 1 - (3*numerator)/(len(Y)**2 - 1), | |
# for whatever reason, the below modification gave better results... 🤷♂️ | |
'unordered': 1 + (len(Y)*numerator)/(4*denominator), | |
'length': len(Y) | |
} | |
# put it all together | |
return output | |
# the data, play around with functions or random | |
length = 10000 | |
x = [_ for _ in range(length)] | |
y = [sin(_/100) for _ in range(length)] | |
print(xicor_integrated(x, y)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment