{{ message }}

Instantly share code, notes, and snippets.

# hugo1005/Games_Howell.py

Created Dec 23, 2020
Games Howell Test (Multiple Comparisons)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 from scipy.stats import f_oneway a = all_metrics[all_metrics['significance'] == 'insignficant']['post_covid_return'].dropna().values b = all_metrics[all_metrics['significance'] == 'intrinsic_only']['post_covid_return'].dropna().values c = all_metrics[all_metrics['significance'] == 'sharpe_only']['post_covid_return'].dropna().values d = all_metrics[all_metrics['significance'] == 'both']['post_covid_return'].dropna().values import statsmodels print(f_oneway(a,b,c,d)) # Signficicant Difference between groups from itertools import combinations, product import numpy_indexed as npi from statsmodels.stats.libqsturng import qsturng, psturng def games_howell(data=None, dv=None, between=None, effsize='hedges'): group_comps = [] mean_differences = [] degrees_freedom = [] t_values = [] p_values = [] std_err = [] up_conf = [] low_conf = [] data = data[[dv, between]].reset_index().values alpha = 0.05 k = len(np.unique(data[:,2])) group_means = dict(npi.group_by(data[:, 2], data[:, 1], np.mean)) group_obs = dict(npi.group_by(data[:, 2], data[:, 1], len)) group_variance = dict(npi.group_by(data[:, 2], data[:, 1], np.var)) combs = list(combinations(np.unique(data[:, 2]), 2)) for comb in combs: # Mean differences of each group combination diff = group_means[comb[1]] - group_means[comb[0]] # t-value of each group combination t_val = np.abs(diff) / np.sqrt((group_variance[comb[0]] / group_obs[comb[0]]) + (group_variance[comb[1]] / group_obs[comb[1]])) # Numerator of the Welch-Satterthwaite equation df_num = (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]]) ** 2 # Denominator of the Welch-Satterthwaite equation df_denom = ((group_variance[comb[0]] / group_obs[comb[0]]) ** 2 / (group_obs[comb[0]] - 1) + (group_variance[comb[1]] / group_obs[comb[1]]) ** 2 / (group_obs[comb[1]] - 1)) # Degrees of freedom df = df_num / df_denom # p-value of the group comparison p_val = psturng(t_val * np.sqrt(2), k, df) # Standard error of each group combination se = np.sqrt(0.5 * (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]])) # Upper and lower confidence intervals upper_conf = diff + qsturng(1 - alpha, k, df) lower_conf = diff - qsturng(1 - alpha, k, df) # Append the computed values to their respective lists. mean_differences.append(diff) degrees_freedom.append(df) t_values.append(t_val) p_values.append(p_val) std_err.append(se) up_conf.append(upper_conf) low_conf.append(lower_conf) group_comps.append(str(comb[0]) + ' : ' + str(comb[1])) result_df = pd.DataFrame({'groups': group_comps, 'mean_difference': mean_differences, 'std_error': std_err, 't_value': t_values, 'p_value': p_values, 'upper_limit': up_conf, 'lower limit': low_conf}) return result_df games_howell(all_metrics, dv='post_covid_return', between='significance')