Skip to content

Instantly share code, notes, and snippets.

@tleonardi
Last active October 17, 2019 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tleonardi/d80ec68a8240088a45894bc0f89c066e to your computer and use it in GitHub Desktop.
Save tleonardi/d80ec68a8240088a45894bc0f89c066e to your computer and use it in GitHub Desktop.
Hou's method for the approximation for the distribution of the weighted combination of non-independent or independent probabilities.
from scipy.stats import chi2
import numpy as np
def combine_pvalues_hou(pvalues, weights, cor_mat):
""" Hou's method for the approximation for the distribution of the weighted
combination of non-independent or independent probabilities.
If any pvalue is nan, returns nan.
https://doi.org/10.1016/j.spl.2004.11.028
pvalues: list of pvalues to be combined
weights: the weights of the pvalues
cor_mat: a matrix containing the correlation coefficients between pvalues
Test: when weights are equal and cor=0, hou is the same as Fisher
print(combine_pvalues([0.1,0.02,0.1,0.02,0.3], method='fisher')[1])
print(hou([0.1,0.02,0.1,0.02,0.3], [1,1,1,1,1], np.zeros((5,5))))
"""
if(len(pvalues) != len(weights)):
raise Exception("Can't combine pvalues if pvalues and weights are not the same length.")
if( cor_mat.shape[0] != cor_mat.shape[1] or cor_mat.shape[0] != len(pvalues)):
raise Exception("The correlation matrix needs to be squared, with each dimension equal to the length of the pvalues vector.")
if all(p==1 for p in pvalues):
return 1
if any((p==0 or np.isinf(p) or p>1) for p in pvalues):
raise Exception("At least one p-value is invalid")
# Covariance estimation as in Kost and McDermott (eq:8)
# https://doi.org/10.1016/S0167-7152(02)00310-3
cov = lambda r: (3.263*r)+(0.710*r**2)+(0.027*r**3)
k=len(pvalues)
cov_sum=np.float64(0)
sw_sum=np.float64(0)
w_sum=np.float64(0)
tau=np.float64(0)
for i in range(k):
for j in range(i+1,k):
cov_sum += weights[i]*weights[j]*cov(cor_mat[i][j])
sw_sum += weights[i]**2
w_sum += weights[i]
# Calculate the weighted Fisher's combination statistic
tau += weights[i] * (-2*np.log(pvalues[i]))
# Correction factor
c = (2*sw_sum+cov_sum) / (2*w_sum)
# Degrees of freedom
f = (4*w_sum**2) / (2*sw_sum+cov_sum)
# chi2.sf is the same as 1-chi2.cdf but is more accurate
combined_p_value = chi2.sf(tau/c,f)
# Return a very small number if pvalue = 0
if combined_p_value == 0:
combined_p_value = np.finfo(np.float).tiny
return combined_p_value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment