Skip to content

Instantly share code, notes, and snippets.

@SamarDeen
Last active September 2, 2022 14:41
Show Gist options
  • Save SamarDeen/df32145a233e47dfcf5430abc592f985 to your computer and use it in GitHub Desktop.
Save SamarDeen/df32145a233e47dfcf5430abc592f985 to your computer and use it in GitHub Desktop.
import statsmodels.stats.proportion
from statsmodels.stats.proportion import proportions_ztest
#build counter
df_test['Count'] = 1
df_train['Count'] = 1
#Get proportion for test data
df_test_Person = df_test.groupby(['Person_drift'], dropna = False, as_index=False).agg({'Count': np.sum,'Target_drift': np.sum})
df_test_Person['Ratio'] = (df_test_Person['Target_drift']/df_test_Person['Count']).round(3)
df_test_Person['Proportion'] = df_test_Person['Count']/ df_test['Count'].sum()
df_test_Person.columns = ['Person', 'Count', 'Target', 'Ratio','Proportion' ]
df_test_Person['Sample'] = 'Test'
#Get proportion for train data
df_train_Person = df_train.groupby(['Person_no_drift'], dropna = False, as_index=False).agg({'Count': np.sum, 'Target_no_drift': np.sum})
df_train_Person['Ratio'] = (df_train_Person['Target_no_drift']/df_train_Person['Count']).round(3)
df_train_Person['Proportion'] = df_train_Person['Count']/ df_train['Count'].sum()
df_train_Person.columns = ['Person', 'Count', 'Target', 'Ratio','Proportion' ]
df_train_Person['Sample'] = 'Train'
df_prop_test = df_test_Person.merge(df_train_Person, how='outer',on=['Person'])
# union both samples to prepare data for proportions test
df_union_all= pd.concat([df_test_Person, df_train_Person])
#proportions test
stat, pval = proportions_ztest(df_union_all[df_union_all['Person']=='artist']['Target'], df_union_all[df_union_all['Person']=='artist']['Count'])
pval = pval.round(4)
stat = stat.round(4)
print(pval, ' ', stat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment