Skip to content

Instantly share code, notes, and snippets.

@brentp
Created March 11, 2017 01:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brentp/5371e0410759af8fdf6da17cc1c63acc to your computer and use it in GitHub Desktop.
Save brentp/5371e0410759af8fdf6da17cc1c63acc to your computer and use it in GitHub Desktop.
from scipy.stats import chisquare
import sys
def cnrand(counts):
"""
cnrand will return the chisquare p-value of the observed distribution
of copy-number genomes **of the same CN with 0...n copies of H47R
# test 4-copy genomes (0 ... 4)
# 1 0 copy, 1 3-copy and 1 4-copy
>>> cnrand([1, 0, 0, 1, 1])
>>> cnrand([0, 10, 0, 0, 0])
"""
total_alleles = sum(a * b for a, b in enumerate(counts))
#sys.stderr.write("total_alleles: %d\n" % total_alleles)
total_genomes = (len(counts) - 1) * sum(counts)
#sys.stderr.write("total_genomes: %d\n" % total_genomes)
af = total_alleles / float(total_genomes)
m = sum(counts) #/ float(len(counts) - 1)
expected = [1-af] + [af**i for i in range(1, len(counts))]
#sys.stderr.write("%s\n" % expected)
expected = [v * m for v in expected]
sys.stderr.write("%s\n" % expected)
assert len(counts) == len(expected)
return af, expected, chisquare(counts, expected)
if __name__ == "__main__":
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment