Skip to content

Instantly share code, notes, and snippets.

@JudoWill
Created June 28, 2011 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JudoWill/1051335 to your computer and use it in GitHub Desktop.
Save JudoWill/1051335 to your computer and use it in GitHub Desktop.
Using logarithms to do hypergeometric tests with large N
from math import log, exp
from mpmath import loggamma
def logchoose(ni, ki):
try:
lgn1 = loggamma(ni+1)
lgk1 = loggamma(ki+1)
lgnk1 = loggamma(ni-ki+1)
except ValueError:
#print ni,ki
raise ValueError
return lgn1 - (lgnk1 + lgk1)
def gauss_hypergeom(X, n, m, N):
"""Returns the probability of drawing X successes of m marked items
in n draws from a bin of N total items."""
assert N >= m, 'Number of items %i must be larger than the number of marked items %i' % (N, m)
assert m >= X, 'Number of marked items %i must be larger than the number of sucesses %i' % (m, X)
assert n >= X, 'Number of draws %i must be larger than the number of sucesses %i' % (n, X)
assert N >= n, 'Number of draws %i must be smaller than the total number of items %i' % (n, N)
r1 = logchoose(m, X)
try:
r2 = logchoose(N-m, n-X)
except ValueError:
return 0
r3 = logchoose(N,n)
return exp(r1 + r2 - r3)
def hypergeo_cdf(X, n, m, N):
assert N >= m, 'Number of items %i must be larger than the number of marked items %i' % (N, m)
assert m >= X, 'Number of marked items %i must be larger than the number of sucesses %i' % (m, X)
assert n >= X, 'Number of draws %i must be larger than the number of sucesses %i' % (n, X)
assert N >= n, 'Number of draws %i must be smaller than the total number of items %i' % (n, N)
assert N-m >= n-X, 'There are more failures %i than unmarked items %i' % (N-m, n-X)
s = 0
for i in range(1, X+1):
s += max(gauss_hypergeom(i, n, m, N), 0.0)
return min(max(s,0.0), 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment