tleonardi/combine_pvalues_hou.py

## combine_pvalues_hou.py
from scipy.stats import chi2
import numpy as np

def combine_pvalues_hou(pvalues, weights, cor_mat):
    """ Hou's method for the approximation for the distribution of the weighted
        combination of non-independent or independent probabilities.
        If any pvalue is nan, returns nan.
        https://doi.org/10.1016/j.spl.2004.11.028
        pvalues: list of pvalues to be combined
        weights: the weights of the pvalues
        cor_mat: a matrix containing the correlation coefficients between pvalues
        Test: when weights are equal and cor=0, hou is the same as Fisher
        print(combine_pvalues([0.1,0.02,0.1,0.02,0.3], method='fisher')[1])
        print(hou([0.1,0.02,0.1,0.02,0.3], [1,1,1,1,1], np.zeros((5,5))))
    """
    if(len(pvalues) != len(weights)):
        raise Exception("Can't combine pvalues if pvalues and weights are not the same length.")
    if( cor_mat.shape[0] != cor_mat.shape[1] or cor_mat.shape[0] != len(pvalues)):
        raise Exception("The correlation matrix needs to be squared, with each dimension equal to the length of the pvalues vector.")
    if all(p==1 for p in pvalues):
        return 1
    if any((p==0 or np.isinf(p) or p>1) for p in pvalues):
        raise Exception("At least one p-value is invalid")

    # Covariance estimation as in Kost and McDermott (eq:8)
    # https://doi.org/10.1016/S0167-7152(02)00310-3
    cov = lambda r: (3.263*r)+(0.710*r**2)+(0.027*r**3)
    k=len(pvalues)
    cov_sum=np.float64(0)
    sw_sum=np.float64(0)
    w_sum=np.float64(0)
    tau=np.float64(0)
    for i in range(k):
        for j in range(i+1,k):
            cov_sum += weights[i]*weights[j]*cov(cor_mat[i][j])
        sw_sum += weights[i]**2
        w_sum += weights[i]
        # Calculate the weighted Fisher's combination statistic
        tau += weights[i] * (-2*np.log(pvalues[i]))
    # Correction factor
    c = (2*sw_sum+cov_sum) / (2*w_sum)
    # Degrees of freedom
    f = (4*w_sum**2) / (2*sw_sum+cov_sum)
    # chi2.sf is the same as 1-chi2.cdf but is more accurate
    combined_p_value = chi2.sf(tau/c,f)
    # Return a very small number if pvalue = 0
    if combined_p_value == 0:
        combined_p_value = np.finfo(np.float).tiny
    return combined_p_value
	from scipy.stats import chi2
	import numpy as np

	def combine_pvalues_hou(pvalues, weights, cor_mat):
	""" Hou's method for the approximation for the distribution of the weighted
	combination of non-independent or independent probabilities.
	If any pvalue is nan, returns nan.
	https://doi.org/10.1016/j.spl.2004.11.028
	pvalues: list of pvalues to be combined
	weights: the weights of the pvalues
	cor_mat: a matrix containing the correlation coefficients between pvalues
	Test: when weights are equal and cor=0, hou is the same as Fisher
	print(combine_pvalues([0.1,0.02,0.1,0.02,0.3], method='fisher')[1])
	print(hou([0.1,0.02,0.1,0.02,0.3], [1,1,1,1,1], np.zeros((5,5))))
	"""
	if(len(pvalues) != len(weights)):
	raise Exception("Can't combine pvalues if pvalues and weights are not the same length.")
	if( cor_mat.shape[0] != cor_mat.shape[1] or cor_mat.shape[0] != len(pvalues)):
	raise Exception("The correlation matrix needs to be squared, with each dimension equal to the length of the pvalues vector.")
	if all(p==1 for p in pvalues):
	return 1
	if any((p==0 or np.isinf(p) or p>1) for p in pvalues):
	raise Exception("At least one p-value is invalid")

	# Covariance estimation as in Kost and McDermott (eq:8)
	# https://doi.org/10.1016/S0167-7152(02)00310-3
	cov = lambda r: (3.263r)+(0.710r*2)+(0.027r**3)
	k=len(pvalues)
	cov_sum=np.float64(0)
	sw_sum=np.float64(0)
	w_sum=np.float64(0)
	tau=np.float64(0)
	for i in range(k):
	for j in range(i+1,k):
	cov_sum += weights[i]weights[j]cov(cor_mat[i][j])
	sw_sum += weights[i]**2
	w_sum += weights[i]
	# Calculate the weighted Fisher's combination statistic
	tau += weights[i] * (-2*np.log(pvalues[i]))
	# Correction factor
	c = (2sw_sum+cov_sum) / (2w_sum)
	# Degrees of freedom
	f = (4w_sum2) / (2sw_sum+cov_sum)
	# chi2.sf is the same as 1-chi2.cdf but is more accurate
	combined_p_value = chi2.sf(tau/c,f)
	# Return a very small number if pvalue = 0
	if combined_p_value == 0:
	combined_p_value = np.finfo(np.float).tiny
	return combined_p_value