Skip to content

Instantly share code, notes, and snippets.

@jeanbaptisteb
Created July 7, 2024 17:28
Show Gist options
  • Save jeanbaptisteb/9b1e4803132859213f603cda83a9cf69 to your computer and use it in GitHub Desktop.
Save jeanbaptisteb/9b1e4803132859213f603cda83a9cf69 to your computer and use it in GitHub Desktop.
F-test for the difference in means between two samples from exponential distributions
#This two-sided test compares the means of two samples coming from exponential distributions, and returns a p-value.
#It is generally more powerful than the Welch's t-test or the regular t-test, when sample sizes are equal.
#Avoid using it if sample sizes are unequal.
from scipy import stats
import numpy as np
import warnings
#Function to conduct the test
def f_test_expon_means(x, y):
#x is the first sample, y the second sample
ratio = np.mean(x) / np.mean(y)
n_x = len(x)
n_y = len(y)
if n_x != n_y:
warnings.warn("The sample sizes are unequal. Consider using the Welch's t-test instead: `stats.ttest_ind(x, y, equal_var=False)`")
pval = 1 - stats.f.sf(ratio, n_x*2, n_y*2)
pval = 2 * min([pval, 1-pval])
return pval
#Example:
#we generate two random sample with an exponential distribution, x and y:
x = stats.expon(scale=2).rvs(size=200, random_state=0)
y = stats.expon(scale=1.5).rvs(size=200, random_state=0)
#then we use the function on these two samples:
f_test_expon_means(x,y)
#0.0041035208895015
#And that's it!
#Simulation to compare power of Welch's t-test to the power of the F-test for means, with an alpha level of 0.05
#The sample sizes here are equal (150), so the F-test is a bit more powerful here
results_f = []
results_t = []
a = stats.expon(scale=1.5)
b = stats.expon(scale=1)
for i in range(10000):
x = a.rvs(150)
y = b.rvs(150)
p_f = f_test_expon_means(x,y)
results_f.append(p_f)
p_t = stats.ttest_ind(x, y, equal_var=False).pvalue
results_t.append(p_t)
results_f = np.array(results_f)
print("power of the F-test:", np.mean(results_f<0.05))
results_t = np.array(results_t)
print("power of the Welch's t-test:", np.mean(results_t<0.05))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment