Last active
December 7, 2015 23:04
-
-
Save brentp/172e213a87f3f6188891 to your computer and use it in GitHub Desktop.
calculate confidence limits of a proportion (an allele frequency)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import sqrt | |
def jeffreys_interval(af, n_samples, alpha=0.05): | |
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval | |
""" | |
>>> jeffreys_interval(0.2, 5) #doctest: +ELLIPSIS | |
(0.02251..., 0.6286...) | |
>>> jeffreys_interval(0.2, 10) #doctest: +ELLIPSIS | |
(0.0440..., 0.5027...) | |
>>> jeffreys_interval(0.2, 100) #doctest: +ELLIPSIS | |
(0.1307..., 0.2862...) | |
>>> jeffreys_interval(0.002, 1000) #doctest: +ELLIPSIS | |
(0.000415..., 0.00640...) | |
""" | |
from scipy.stats import beta | |
n_success = af * n_samples | |
l = beta.ppf(alpha / 2, a=n_success + 0.5, b=n_samples - n_success + 0.5) | |
u = beta.ppf(1 - alpha / 2, a=n_success + 0.5, b=n_samples - n_success + 0.5) | |
return l, u | |
def af_lims(af, n_samples, alpha=0.05): | |
""" | |
calculate the confiedence limits (alpha, 1 - alpha) of allele frequency | |
(af) from n_samples | |
Uses eqn 4 from: | |
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.408.7107&rep=rep1&type=pdf | |
AF of 0.2 with varying samples | |
>>> af_lims(0.2, 5) #doctest: +ELLIPSIS | |
(0.01052..., 0.70121...) | |
>>> af_lims(0.2, 10) #doctest: +ELLIPSIS | |
(0.0354..., 0.5578...) | |
>>> af_lims(0.2, 100) #doctest: +ELLIPSIS | |
(0.1292..., 0.2943...) | |
>>> af_lims(0.2, 1000) #doctest: +ELLIPSIS | |
(0.17590..., 0.2264...) | |
AF of 0.002 with varying samples | |
>>> af_lims(0.002, 5) #doctest: +ELLIPSIS | |
(0.01775..., 0.5389...) | |
>>> af_lims(0.002, 10) #doctest: +ELLIPSIS | |
(0.008328..., 0.3470...) | |
>>> af_lims(0.002, 100) #doctest: +ELLIPSIS | |
(0.000280154..., 0.049524...) | |
>>> af_lims(0.002, 1000) #doctest: +ELLIPSIS | |
(0.00034..., 0.00803...) | |
""" | |
alpha_to_z = { | |
0.1: 1.64485362695, | |
0.05: 1.96, | |
0.01: 2.57582930355, | |
0.001: 3.29052673149, | |
} | |
p, n = af, n_samples | |
z = alpha_to_z[alpha] | |
q = 1 - p | |
a = 2*n*p + z**2 | |
denom = 2 * (n + z**2) | |
try: | |
bL = z * sqrt(z**2 - 2 - 1.0 / n + 4*p*(n*q+1)) | |
L = (a - 1 - bL) / denom | |
except ValueError: | |
L = 0.0 | |
try: | |
bU = z * sqrt(z**2 + 2 - 1.0 / n + 4*p*(n*q-1)) | |
U = (a + 1 + bU) / denom | |
except ValueError: | |
U = 1.0 | |
return max(L, 0.0), min(U, 1.0) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() | |
import sys | |
if len(sys.argv) > 1: | |
n_exac_samples = 60706 | |
import sqlite3 | |
db = sqlite3.connect(sys.argv[1]) | |
db.row_factory = sqlite3.Row | |
cursor = db.cursor() | |
cols = ["aaf_adj_exac_afr", "aaf_adj_exac_amr", "aaf_adj_exac_eas", "aaf_adj_exac_nfe", "aaf_adj_exac_sas"] | |
try: | |
cursor.execute("ALTER table variants ADD COLUMN exac_af_lower_ci_max REAL") | |
print "adding 'exac_af_lower_ci_max' column" | |
except sqlite3.OperationalError: | |
print "modifying existing 'exac_af_lower_ci_max' column" | |
maxs = [] | |
for afs in cursor.execute("select %s from variants" % ", ".join(cols)): | |
# [0] pulls the lower CI. | |
cis = [af_lims(af, n_exac_samples)[0] for af in afs if af is not None and -1 < af] | |
if len(cis) == 0: | |
maxs.append(-1) | |
else: | |
maxs.append(max(cis)) | |
print "updating database with %d values" % len(maxs) | |
cursor.execute("BEGIN TRANSACTION") | |
for i, m in enumerate(maxs, start=1): | |
cursor.execute("UPDATE variants SET exac_af_lower_ci_max = %f WHERE variant_id = %d" % (m, i)) | |
db.commit() | |
cursor.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I believe there's a minor error in
af_lims
, n should be n_samples * 2 (or better yet, number of chromosomes or alleles called since you might be able to make haploid calls).