Skip to content

Instantly share code, notes, and snippets.

@brentp
Last active December 7, 2015 23:04
Show Gist options
  • Save brentp/172e213a87f3f6188891 to your computer and use it in GitHub Desktop.
Save brentp/172e213a87f3f6188891 to your computer and use it in GitHub Desktop.
calculate confidence limits of a proportion (an allele frequency)
from math import sqrt
def jeffreys_interval(af, n_samples, alpha=0.05):
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
"""
>>> jeffreys_interval(0.2, 5) #doctest: +ELLIPSIS
(0.02251..., 0.6286...)
>>> jeffreys_interval(0.2, 10) #doctest: +ELLIPSIS
(0.0440..., 0.5027...)
>>> jeffreys_interval(0.2, 100) #doctest: +ELLIPSIS
(0.1307..., 0.2862...)
>>> jeffreys_interval(0.002, 1000) #doctest: +ELLIPSIS
(0.000415..., 0.00640...)
"""
from scipy.stats import beta
n_success = af * n_samples
l = beta.ppf(alpha / 2, a=n_success + 0.5, b=n_samples - n_success + 0.5)
u = beta.ppf(1 - alpha / 2, a=n_success + 0.5, b=n_samples - n_success + 0.5)
return l, u
def af_lims(af, n_samples, alpha=0.05):
"""
calculate the confiedence limits (alpha, 1 - alpha) of allele frequency
(af) from n_samples
Uses eqn 4 from:
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.408.7107&rep=rep1&type=pdf
AF of 0.2 with varying samples
>>> af_lims(0.2, 5) #doctest: +ELLIPSIS
(0.01052..., 0.70121...)
>>> af_lims(0.2, 10) #doctest: +ELLIPSIS
(0.0354..., 0.5578...)
>>> af_lims(0.2, 100) #doctest: +ELLIPSIS
(0.1292..., 0.2943...)
>>> af_lims(0.2, 1000) #doctest: +ELLIPSIS
(0.17590..., 0.2264...)
AF of 0.002 with varying samples
>>> af_lims(0.002, 5) #doctest: +ELLIPSIS
(0.01775..., 0.5389...)
>>> af_lims(0.002, 10) #doctest: +ELLIPSIS
(0.008328..., 0.3470...)
>>> af_lims(0.002, 100) #doctest: +ELLIPSIS
(0.000280154..., 0.049524...)
>>> af_lims(0.002, 1000) #doctest: +ELLIPSIS
(0.00034..., 0.00803...)
"""
alpha_to_z = {
0.1: 1.64485362695,
0.05: 1.96,
0.01: 2.57582930355,
0.001: 3.29052673149,
}
p, n = af, n_samples
z = alpha_to_z[alpha]
q = 1 - p
a = 2*n*p + z**2
denom = 2 * (n + z**2)
try:
bL = z * sqrt(z**2 - 2 - 1.0 / n + 4*p*(n*q+1))
L = (a - 1 - bL) / denom
except ValueError:
L = 0.0
try:
bU = z * sqrt(z**2 + 2 - 1.0 / n + 4*p*(n*q-1))
U = (a + 1 + bU) / denom
except ValueError:
U = 1.0
return max(L, 0.0), min(U, 1.0)
if __name__ == "__main__":
import doctest
doctest.testmod()
import sys
if len(sys.argv) > 1:
n_exac_samples = 60706
import sqlite3
db = sqlite3.connect(sys.argv[1])
db.row_factory = sqlite3.Row
cursor = db.cursor()
cols = ["aaf_adj_exac_afr", "aaf_adj_exac_amr", "aaf_adj_exac_eas", "aaf_adj_exac_nfe", "aaf_adj_exac_sas"]
try:
cursor.execute("ALTER table variants ADD COLUMN exac_af_lower_ci_max REAL")
print "adding 'exac_af_lower_ci_max' column"
except sqlite3.OperationalError:
print "modifying existing 'exac_af_lower_ci_max' column"
maxs = []
for afs in cursor.execute("select %s from variants" % ", ".join(cols)):
# [0] pulls the lower CI.
cis = [af_lims(af, n_exac_samples)[0] for af in afs if af is not None and -1 < af]
if len(cis) == 0:
maxs.append(-1)
else:
maxs.append(max(cis))
print "updating database with %d values" % len(maxs)
cursor.execute("BEGIN TRANSACTION")
for i, m in enumerate(maxs, start=1):
cursor.execute("UPDATE variants SET exac_af_lower_ci_max = %f WHERE variant_id = %d" % (m, i))
db.commit()
cursor.close()
@jxchong
Copy link

jxchong commented Dec 7, 2015

I believe there's a minor error in af_lims, n should be n_samples * 2 (or better yet, number of chromosomes or alleles called since you might be able to make haploid calls).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment