Skip to content

Instantly share code, notes, and snippets.

@vsbuffalo
Created September 26, 2013 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsbuffalo/6718274 to your computer and use it in GitHub Desktop.
Save vsbuffalo/6718274 to your computer and use it in GitHub Desktop.
Version of entropy function we wrote in class
from __future__ import division
from collections import Counter
from math import log
def entropy(seq, unit="bit"):
"""
Returns entropy of DNA sequence.
The entropy formula is:
entropy = -sum_i (log(p_i) * p_i)
"""
if unit == "bit":
logfun = lambda x: log(x, 2)
elif unit == "nat":
logfun = lambda x: log(x)
elif unit == "dit":
logfun = lambda x: log(x, 10)
else:
raise ValueError, "unit must be bit, nat, or dit"
counts = Counter()
for char in seq:
counts[char] += 1
total = len(seq)
probs = list()
for item in counts:
probs.append(counts[item]/total)
pdb.set_trace()
ent_sum = 0
for p in probs:
ent_sum += logfun(p)*p
return -ent_sum
if __name__ == "__main__":
test_seq = "GA GA GA GA GT GA GA GA GA GT CG CG GA GA"
print "bits", entropy(test_seq, "todd")
print "dits", entropy(test_seq, "dit")
print "nat", entropy(test_seq, "nat")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment