Created
May 3, 2019 10:58
-
-
Save domdfcoding/6b437829e2c3d6e01dcb3307a89d5ba3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Additional data analysis to accompany | |
# "Improving the Analysis of Organic Gunshot Residue with GunShotMatch" | |
# | |
# Copyright 2019 Dominic Davis-Foster <dominic@davis-foster.co.uk> | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
# MA 02110-1301, USA. | |
# | |
# | |
# Based on: | |
# http://cleverowl.uk/2015/07/01/using-one-way-anova-and-tukeys-test-to-compare-data-sets/ | |
# https://www.psychometrica.de/effect_size.html#transform | |
# https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm | |
# and Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd Edition). Hillsdale, NJ: Lawrence Erlbaum Associates | |
import numpy as np | |
import scipy.stats as stats | |
from statsmodels.stats.multicomp import pairwise_tukeyhsd | |
from statsmodels.stats.multicomp import MultiComparison | |
def pooled_sd(sample1, sample2, weighted=False): | |
"""Formula from weighted=True for weighted pooled SD | |
Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm""" | |
sd1 = np.std(sample1) | |
sd2 = np.std(sample2) | |
n1 = len(sample1) | |
n2 = len(sample2) | |
if weighted: | |
return np.sqrt( (((n1-1)*(sd1**2)) + ((n2-1)*(sd2**2))) / (n1+n2-2) ) | |
else: | |
return np.sqrt( ((sd1**2) + (sd2**2)) / 2 ) | |
def g_hedge(sample1, sample2): | |
"""Hedge's g statistic | |
Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm""" | |
mean1 = np.mean(sample1) | |
mean2 = np.mean(sample2) | |
return (mean1-mean2)/pooled_sd(sample1, sample2, True) | |
def g_durlak_bias(g, n): | |
"""Application of Durlak's bias correction to the Hedge's g statistic. | |
Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm | |
n = n1+n2""" | |
Durlak = ((n-3)/(n-2.25))*np.sqrt((n-2)/n) | |
return g*Durlak | |
def interpret_d(d_or_g): | |
"""Interpret Cohen's d or Hedge's g values using Table 1 | |
from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3444174/""" | |
if d_or_g < 0: | |
return f"{interpret_d(np.abs(d_or_g)).split(' ')[0]} Adverse Effect" | |
elif 0.0 <= d_or_g < 0.2: | |
return "No Effect" | |
elif 0.2 <= d_or_g < 0.5: | |
return "Small Effect" | |
elif 0.5 <= d_or_g < 0.8: | |
return "Intermediate Effect" | |
elif 0.8 <= d_or_g: | |
return "Large Effect" | |
"""Dibutyl Phthalate ANOVA""" | |
CBC = [193540.383333333, 156797.01984127, 877545.732539682, 185331.853968254, 198447.588095238] | |
Magtech = [388224.406349206, 1299156.4515873, 775447.952380952, 797549.789682539, 1469425.29206349] | |
Magtech_CleanRange = [190675.011904762, 183011.074603174, 138076.892063492, 112654.025396825, 278997.476190476] | |
a_value = 0.05 | |
f, p = stats.f_oneway(CBC, Magtech, Magtech_CleanRange) | |
print ('Dibutyl Phthalate One-way ANOVA') | |
print ('===============================') | |
print ('F value:', f) | |
print ('P value:', p) | |
print("Reject = ",end='') | |
print("False" if p > a_value else "True") | |
print('') | |
"""Tukey's""" | |
data = [] | |
for area in CBC: | |
data.append(("CBC", area)) | |
for area in Magtech: | |
data.append(("Magtech", area)) | |
for area in Magtech_CleanRange: | |
data.append(("Magtech CleanRange", area)) | |
data = np.rec.array(data, dtype = [('Sample',"|U20"),('Peak Area','float64')]) | |
mc = MultiComparison(data['Peak Area'], data['Sample']) | |
print(mc.tukeyhsd()) | |
print("") | |
print("""Hedge's g\n""") | |
print("CBC v Magtech") | |
g = g_hedge(CBC, Magtech) | |
print(g) | |
print(interpret_d(g_durlak_bias(g, 10))) | |
print("\nCBC v Magtech CleanRange") | |
g = g_hedge(CBC, Magtech_CleanRange) | |
print(g) | |
print(interpret_d(g_durlak_bias(g, 10))) | |
print("\nMagtech v Magtech CleanRange") | |
g = g_hedge(Magtech, Magtech_CleanRange) | |
print(g) | |
print(interpret_d(g_durlak_bias(g, 10))) | |
print("\n") | |
print("""Student's t-test for DBP: CBC v CleanRange""") | |
t, p = stats.ttest_ind_from_stats(np.mean(CBC), np.std(CBC), len(CBC), | |
np.mean(Magtech_CleanRange), | |
np.std(Magtech_CleanRange), | |
len(Magtech_CleanRange)) | |
print ('t Statistic:', f) | |
print ('P value: ', p) | |
print("Reject = ",end='') | |
print("False" if p > a_value else "True") | |
print('') | |
print("""Welch's t-test for DBP: CBC v CleanRange""") | |
t, p = stats.ttest_ind_from_stats(np.mean(CBC), np.std(CBC), len(CBC), | |
np.mean(Magtech_CleanRange), | |
np.std(Magtech_CleanRange), | |
len(Magtech_CleanRange), False) | |
print ('t Statistic:', f) | |
print ('P value: ', p) | |
print("Reject = ",end='') | |
print("False" if p > a_value else "True") | |
print('') | |
print("=========================================================================") | |
print("\n\n") | |
print("""Ethyl Centralite Hedge's g""") | |
CBC = [2360313.56904762,9205011.71269841,4368357.48412698,6818562.4468254,12065016.2349207] | |
Magtech = [112587.645238095,532979.707936508,422138.485714286,436944.629365079,772629.48015873] | |
g = g_hedge(CBC, Magtech) | |
print(g) | |
print(interpret_d(g_durlak_bias(g, 10))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment