Created
December 23, 2020 14:21
-
-
Save hugo1005/f4d5c40194fd8e8e52a64dc1153191ae to your computer and use it in GitHub Desktop.
Games Howell Test (Multiple Comparisons)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import f_oneway | |
a = all_metrics[all_metrics['significance'] == 'insignficant']['post_covid_return'].dropna().values | |
b = all_metrics[all_metrics['significance'] == 'intrinsic_only']['post_covid_return'].dropna().values | |
c = all_metrics[all_metrics['significance'] == 'sharpe_only']['post_covid_return'].dropna().values | |
d = all_metrics[all_metrics['significance'] == 'both']['post_covid_return'].dropna().values | |
import statsmodels | |
print(f_oneway(a,b,c,d)) # Signficicant Difference between groups | |
from itertools import combinations, product | |
import numpy_indexed as npi | |
from statsmodels.stats.libqsturng import qsturng, psturng | |
def games_howell(data=None, dv=None, between=None, effsize='hedges'): | |
group_comps = [] | |
mean_differences = [] | |
degrees_freedom = [] | |
t_values = [] | |
p_values = [] | |
std_err = [] | |
up_conf = [] | |
low_conf = [] | |
data = data[[dv, between]].reset_index().values | |
alpha = 0.05 | |
k = len(np.unique(data[:,2])) | |
group_means = dict(npi.group_by(data[:, 2], data[:, 1], np.mean)) | |
group_obs = dict(npi.group_by(data[:, 2], data[:, 1], len)) | |
group_variance = dict(npi.group_by(data[:, 2], data[:, 1], np.var)) | |
combs = list(combinations(np.unique(data[:, 2]), 2)) | |
for comb in combs: | |
# Mean differences of each group combination | |
diff = group_means[comb[1]] - group_means[comb[0]] | |
# t-value of each group combination | |
t_val = np.abs(diff) / np.sqrt((group_variance[comb[0]] / group_obs[comb[0]]) + | |
(group_variance[comb[1]] / group_obs[comb[1]])) | |
# Numerator of the Welch-Satterthwaite equation | |
df_num = (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]]) ** 2 | |
# Denominator of the Welch-Satterthwaite equation | |
df_denom = ((group_variance[comb[0]] / group_obs[comb[0]]) ** 2 / (group_obs[comb[0]] - 1) + | |
(group_variance[comb[1]] / group_obs[comb[1]]) ** 2 / (group_obs[comb[1]] - 1)) | |
# Degrees of freedom | |
df = df_num / df_denom | |
# p-value of the group comparison | |
p_val = psturng(t_val * np.sqrt(2), k, df) | |
# Standard error of each group combination | |
se = np.sqrt(0.5 * (group_variance[comb[0]] / group_obs[comb[0]] + | |
group_variance[comb[1]] / group_obs[comb[1]])) | |
# Upper and lower confidence intervals | |
upper_conf = diff + qsturng(1 - alpha, k, df) | |
lower_conf = diff - qsturng(1 - alpha, k, df) | |
# Append the computed values to their respective lists. | |
mean_differences.append(diff) | |
degrees_freedom.append(df) | |
t_values.append(t_val) | |
p_values.append(p_val) | |
std_err.append(se) | |
up_conf.append(upper_conf) | |
low_conf.append(lower_conf) | |
group_comps.append(str(comb[0]) + ' : ' + str(comb[1])) | |
result_df = pd.DataFrame({'groups': group_comps, | |
'mean_difference': mean_differences, | |
'std_error': std_err, | |
't_value': t_values, | |
'p_value': p_values, | |
'upper_limit': up_conf, | |
'lower limit': low_conf}) | |
return result_df | |
games_howell(all_metrics, dv='post_covid_return', between='significance') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment