Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Games Howell Test (Multiple Comparisons)
from scipy.stats import f_oneway
a = all_metrics[all_metrics['significance'] == 'insignficant']['post_covid_return'].dropna().values
b = all_metrics[all_metrics['significance'] == 'intrinsic_only']['post_covid_return'].dropna().values
c = all_metrics[all_metrics['significance'] == 'sharpe_only']['post_covid_return'].dropna().values
d = all_metrics[all_metrics['significance'] == 'both']['post_covid_return'].dropna().values
import statsmodels
print(f_oneway(a,b,c,d)) # Signficicant Difference between groups
from itertools import combinations, product
import numpy_indexed as npi
from statsmodels.stats.libqsturng import qsturng, psturng
def games_howell(data=None, dv=None, between=None, effsize='hedges'):
group_comps = []
mean_differences = []
degrees_freedom = []
t_values = []
p_values = []
std_err = []
up_conf = []
low_conf = []
data = data[[dv, between]].reset_index().values
alpha = 0.05
k = len(np.unique(data[:,2]))
group_means = dict(npi.group_by(data[:, 2], data[:, 1], np.mean))
group_obs = dict(npi.group_by(data[:, 2], data[:, 1], len))
group_variance = dict(npi.group_by(data[:, 2], data[:, 1], np.var))
combs = list(combinations(np.unique(data[:, 2]), 2))
for comb in combs:
# Mean differences of each group combination
diff = group_means[comb[1]] - group_means[comb[0]]
# t-value of each group combination
t_val = np.abs(diff) / np.sqrt((group_variance[comb[0]] / group_obs[comb[0]]) +
(group_variance[comb[1]] / group_obs[comb[1]]))
# Numerator of the Welch-Satterthwaite equation
df_num = (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]]) ** 2
# Denominator of the Welch-Satterthwaite equation
df_denom = ((group_variance[comb[0]] / group_obs[comb[0]]) ** 2 / (group_obs[comb[0]] - 1) +
(group_variance[comb[1]] / group_obs[comb[1]]) ** 2 / (group_obs[comb[1]] - 1))
# Degrees of freedom
df = df_num / df_denom
# p-value of the group comparison
p_val = psturng(t_val * np.sqrt(2), k, df)
# Standard error of each group combination
se = np.sqrt(0.5 * (group_variance[comb[0]] / group_obs[comb[0]] +
group_variance[comb[1]] / group_obs[comb[1]]))
# Upper and lower confidence intervals
upper_conf = diff + qsturng(1 - alpha, k, df)
lower_conf = diff - qsturng(1 - alpha, k, df)
# Append the computed values to their respective lists.
mean_differences.append(diff)
degrees_freedom.append(df)
t_values.append(t_val)
p_values.append(p_val)
std_err.append(se)
up_conf.append(upper_conf)
low_conf.append(lower_conf)
group_comps.append(str(comb[0]) + ' : ' + str(comb[1]))
result_df = pd.DataFrame({'groups': group_comps,
'mean_difference': mean_differences,
'std_error': std_err,
't_value': t_values,
'p_value': p_values,
'upper_limit': up_conf,
'lower limit': low_conf})
return result_df
games_howell(all_metrics, dv='post_covid_return', between='significance')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment