jensdebruijn/corrected_dependent_ttest.py

## corrected_dependent_ttest.py
# Python implementation of the Nadeau and Bengio correction of dependent Student's t-test
# using the equation stated in https://www.cs.waikato.ac.nz/~eibe/pubs/bouckaert_and_frank.pdf

from scipy.stats import t
from math import sqrt
from statistics import stdev

def corrected_dependent_ttest(data1, data2, n_training_samples, n_test_samples, alpha):
    n = len(data1)
    differences = [(data1[i]-data2[i]) for i in range(n)]
    sd = stdev(differences)
    divisor = 1 / n * sum(differences)
    test_training_ratio = n_test_folds / n_training_folds
    denominator = sqrt(1 / n + test_training_ratio) * sd
    t_stat = divisor / denominator
    # degrees of freedom
    df = n - 1
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return everything
    return t_stat, df, cv, p
	# Python implementation of the Nadeau and Bengio correction of dependent Student's t-test
	# using the equation stated in https://www.cs.waikato.ac.nz/~eibe/pubs/bouckaert_and_frank.pdf

	from scipy.stats import t
	from math import sqrt
	from statistics import stdev

	def corrected_dependent_ttest(data1, data2, n_training_samples, n_test_samples, alpha):
	n = len(data1)
	differences = [(data1[i]-data2[i]) for i in range(n)]
	sd = stdev(differences)
	divisor = 1 / n * sum(differences)
	test_training_ratio = n_test_folds / n_training_folds
	denominator = sqrt(1 / n + test_training_ratio) * sd
	t_stat = divisor / denominator
	# degrees of freedom
	df = n - 1
	# calculate the critical value
	cv = t.ppf(1.0 - alpha, df)
	# calculate the p-value
	p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
	# return everything
	return t_stat, df, cv, p