Skip to content

Instantly share code, notes, and snippets.

@mandrewstuart
Last active December 29, 2021 21:44
Show Gist options
  • Save mandrewstuart/e1c584a36ca5394cc934542731b4d8c2 to your computer and use it in GitHub Desktop.
Save mandrewstuart/e1c584a36ca5394cc934542731b4d8c2 to your computer and use it in GitHub Desktop.
Modified Xi correlation, implementation of Chatterjee's new coefficient for nonlinear relationships (including sinusoidal)
# https://arxiv.org/pdf/1909.10140.pdf
from random import shuffle
from math import sin
def xicor_integrated(X, Y):
# sort Y based on X
new_vars = []
for i in range(len(X)):
new_vars.append([X[i], Y[i]])
new_vars.sort()
Y = [i[1] for i in new_vars]
# calculate rank of Ys when X is ordered
top_ranks = []
for index in range(len(Y)):
rank = len([item for item in Y if item <= Y[index]])
top_ranks.append(rank)
# calculate sum of abs diffs of ranks
numerator = 0
for index in range(1, len(top_ranks)):
numerator += abs(top_ranks[index] - top_ranks[index - 1])
# calculations for denominator
bottom_ranks = []
for index in range(len(Y)):
count = len([item for item in Y if item >= Y[index]])
bottom_ranks.append(count)
# calculation product for denominator
denominator = 0
for index in range(len(Y)):
denominator += bottom_ranks[index] * \
(1 + index - bottom_ranks[index])
output = {
'ordered': 1 - (3*numerator)/(len(Y)**2 - 1),
# for whatever reason, the below modification gave better results... 🤷‍♂️
'unordered': 1 + (len(Y)*numerator)/(4*denominator),
'length': len(Y)
}
# put it all together
return output
# the data, play around with functions or random
length = 10000
x = [_ for _ in range(length)]
y = [sin(_/100) for _ in range(length)]
print(xicor_integrated(x, y))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment