Created
May 29, 2021 12:02
-
-
Save doraneko94/fa39139f721300f50533a69ceb8b6112 to your computer and use it in GitHub Desktop.
01-Kendall rank correlation coefficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def tau_num(X: np.ndarray, Y: np.ndarray): | |
XpY = X + Y | |
XmY = X - Y | |
A = (XpY == 2).sum() | |
S = (XpY == 0).sum() | |
dX = (XmY == 1).sum() | |
dY = (XmY == -1).sum() | |
return A * S - dX * dY | |
def tau_den(X: np.ndarray, Y: np.ndarray): | |
N = len(X) | |
X_sum = X.sum() | |
Y_sum = Y.sum() | |
n0 = N * (N - 1) / 2 | |
n1 = (X_sum * (X_sum - 1) + (N - X_sum) * (N - X_sum - 1)) / 2 | |
n2 = (Y_sum * (Y_sum - 1) + (N - Y_sum) * (N - Y_sum - 1)) / 2 | |
return np.sqrt(n0 - n1) * np.sqrt(n0 - n2) | |
def zb_den(X: np.ndarray, Y: np.ndarray): | |
N = len(X) | |
NN1 = N * (N - 1) | |
X_sum = X.sum() | |
Y_sum = Y.sum() | |
ti = [N - X_sum, X_sum] | |
ui = [N - Y_sum, Y_sum] | |
titi1 = [ti[0] * (ti[0] - 1), ti[1] * (ti[1] - 1)] | |
uiui1 = [ui[0] * (ui[0] - 1), ui[1] * (ui[1] - 1)] | |
v0 = NN1 * (2 * N + 5) | |
vt = titi1[0] * (2 * ti[0] + 5) + titi1[1] * (2 * ti[1] + 5) | |
vu = uiui1[0] * (2 * ui[0] + 5) + uiui1[1] * (2 * ui[1] + 5) | |
v1 = (titi1[0] + titi1[1]) * (uiui1[0] + uiui1[1]) / (2 * NN1) | |
v2 = (titi1[0] * (ti[0] - 2) + titi1[1] * (ti[1] - 2)) * (uiui1[0] * (ui[0] - 2) + uiui1[1] * (ui[1] - 2)) / (9 * NN1 * (N - 2)) | |
return (v0 - vt - vu) / 18 + v1 + v2 | |
def tau(X: np.ndarray, Y: np.ndarray): | |
return tau_num(X, Y) / tau_den(X, Y) | |
def zb(X: np.ndarray, Y: np.ndarray): | |
return tau_num(X, Y) / np.sqrt(zb_den(X, Y)) | |
if __name__ == "__main__": | |
from matplotlib import pyplot as plt | |
from scipy.stats import norm | |
zb_list = [] | |
for i in range(10000): | |
r = np.random.random(size=(2, 100)) | |
x = np.zeros((2, 100)) | |
x[r > 0.3] = 1 | |
zb_list.append(zb(x[0], x[1])) | |
bin_data = plt.hist(zb_list, bins=50) | |
x_axis = bin_data[1] | |
db = x_axis[1] - x_axis[0] | |
plt.plot(x_axis, norm.pdf(x_axis) * db * 10000, label="N(0, 1)") | |
plt.xlabel("z_B") | |
plt.title("Distribution of z_B") | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment