Last active
January 12, 2017 12:16
-
-
Save brentp/3803893 to your computer and use it in GitHub Desktop.
c-alpha test in python. copied from AssotesteR. and rpy2 wrapper for all functions in AssotesteR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
C-alpha test for rare variants. Copied from AssotesteR | |
Only reports the permutation p-value, not the asymptotic | |
This also wraps all of the functions in R's AssotesteR using | |
rpy2. Any function available in AssotesteR, e.g. CALPHA is | |
availailable here has r_calpha(). | |
The args are always | |
casecon: a list of 0's, 1's indicating case/control status | |
genotype: a numpy array with shape len(casecon), n_snps. | |
the return value is a dictionary with keys of: stat, perm_p, | |
asym_p, function. | |
# TODO: http://www.ncbi.nlm.nih.gov/pubmed/21769936 | |
""" | |
import numpy as np | |
import rpy2.robjects.numpy2ri as nri | |
from rpy2 import robjects | |
from rpy2.robjects.packages import importr | |
from rpy2.robjects.functions import SignatureTranslatedFunction | |
AssotesteR = importr("AssotesteR") | |
class rassotest(object): | |
def __init__(self, func_name): | |
self.function_name = func_name | |
self.fn = getattr(AssotesteR, func_name) | |
def __call__(self, casecon, genotype, **kwargs): | |
casecon = robjects.IntVector(casecon) | |
genotype = nri.numpy2ri(genotype) | |
res = self.fn(robjects.IntVector(casecon), nri.numpy2ri(genotype), **kwargs) | |
res = dict(res.iteritems()) | |
# convert from rpy2 stuffs to python dict. | |
skey = [k for k in res if k.endswith('.stat')][0] | |
perm_p = res['perm.pval'][0] | |
asym_p = res.get('asym.pval', [None])[0] | |
return dict(stat=res[skey][0], perm_p=perm_p, asym_p=asym_p, | |
function=self.function_name) | |
# wrapper for SKAT. | |
class PySKAT(object): | |
""" | |
covs is a list of covariates. each list in covs should be the same | |
length as cc | |
""" | |
def __init__(self, cc, covs=None): | |
if hasattr(covs, "__iter__") and not hasattr(covs[0], "__iter__"): | |
covs = [covs] | |
elif covs is None: | |
covs = [] | |
formula = "cc ~ 1 " | |
cov_string = " + ".join("V%i" % i for i, cov in enumerate(covs)) | |
if covs != []: | |
formula += " + " + cov_string | |
fml = robjects.Formula(formula) | |
fml.environment['cc'] = robjects.IntVector(cc) | |
for i, cov in enumerate(covs): | |
assert len(cov) == len(cc) | |
fml.environment["V%i" % i] = robjects.FloatVector(cov) | |
fml.environment['cc'] = robjects.IntVector(cc) | |
self.obj = SKAT.SKAT_Null_Model(fml) | |
self.formula = formula | |
def __call__(self, genotype): | |
_ = sys.stdout | |
sys.stdout = sys.stderr | |
sk_res = SKAT.SKAT(nri.numpy2ri(genotype.T), self.obj, method="optimal") | |
sys.stdout = _ | |
py_res = dict(sk_res.iteritems()) | |
skat_pval = py_res['p.value'][0] | |
params = dict(py_res['param'].iteritems()) | |
rho_est = params['rho_est'][0] | |
return skat_pval, rho_est | |
# these 2 lines are unnessecary. | |
# the below loop imports and wraps all of the R functions | |
r_calpha = rassotest("CALPHA") | |
r_cmat = rassotest("CMAT") | |
# import and wrap all the functions in AssotesteR | |
for name in dir(AssotesteR): | |
if isinstance(getattr(AssotesteR, name), SignatureTranslatedFunction): | |
fname = "r_" + name.lower() | |
globals()[fname] = rassotest(name) | |
# python version of calpha | |
def _calpha(casecon, genotype): | |
nCase = casecon.sum() | |
nControl = len(casecon) - nCase | |
pCase = nCase / float(nControl + nCase) | |
n = (genotype > 0).sum(0) | |
g = (genotype[casecon == 1,] > 0).sum(0) | |
Talpha = ((g - n * pCase)**2 - (n * pCase * (1 - pCase))).sum() | |
return Talpha | |
def calpha(casecon, genotype, n_perm=10000): | |
casecon = np.asarray(casecon) | |
calpha_stat = _calpha(casecon, genotype) | |
perm_stats = [] | |
rand_cc = casecon.copy() | |
for i in xrange(n_perm): | |
np.random.shuffle(rand_cc) | |
perm_stats.append(_calpha(rand_cc, genotype)) | |
perm_stats = np.array(perm_stats) ** 2 | |
return calpha_stat, (1 + (perm_stats > (calpha_stat ** 2)).sum()) / float(n_perm + 1) | |
def _cmat(casecon, g): | |
nCase = casecon.sum() | |
nCtrl = len(casecon) - nCase | |
mCase = g[casecon == 1,:].sum() | |
mCtrl = g[casecon == 0,:].sum() | |
iCase = (2 - g[casecon == 1,:]).sum() | |
iCtrl = (2 - g[casecon == 0,:]).sum() | |
cm1 = len(casecon) / (2. * nCase * nCtrl * g.shape[1]) | |
cm2 = ((mCase * iCtrl - mCtrl * iCase)**2) / \ | |
float((mCase + mCtrl) * (iCase + iCtrl)) | |
return cm1 * cm2 | |
def cmat(casecon, genotype, n_perm=100, rare_cutoff="auto"): | |
if rare_cutoff == "auto": | |
# cutoff from SKAT CommonRare | |
rare_cutoff = 1 / np.sqrt(2 * len(casecon)) | |
casecon = np.asarray(casecon) | |
# just do this once since we only shuffle case/control status | |
mafs = genotype.mean(0) / 2 | |
if any(mafs < rare_cutoff): | |
genotype = genotype[:, mafs < rare_cutoff] | |
else: | |
return (np.nan, np.nan) | |
cmat_stat = _cmat(casecon, genotype) | |
perm_stats = [] | |
rand_cc = casecon.copy() | |
for i in xrange(n_perm): | |
np.random.shuffle(rand_cc) | |
perm_stats.append(_cmat(rand_cc, genotype)) | |
perm_stats = np.array(perm_stats) | |
return cmat_stat, (1. + (perm_stats > cmat_stat).sum()) / float(1 + n_perm) | |
if __name__ == "__main__": | |
casecon = [0] * 100 + [1] * 100 | |
genotype = np.random.randint(0, 1, (len(casecon), 20)) | |
#genotype += np.random.randint(0, 1, (len(casecon), 20)) | |
genotype[120:130,1:5] = 1 | |
print cmat(casecon, genotype, rare_cutoff=0.05) | |
print(r_cmat(casecon, genotype, perm=100, maf=1/np.sqrt(2*len(casecon)))) | |
#print(r_cmc(casecon, genotype, perm=400)) | |
#print(r_asum(casecon, genotype, perm=400)) | |
#print(r_bst(casecon, genotype, perm=400)) | |
#print(r_wss(casecon, genotype, perm=400)) | |
#print(r_assuw(casecon, genotype, perm=400)) | |
#print(r_wst(casecon, genotype, perm=400)) | |
print "R calpha:" | |
print(r_calpha(casecon, genotype, perm=400)) | |
print "numpy calpha:" | |
print calpha(casecon, genotype) | |
print "numpy cmat:" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment