Skip to content

Instantly share code, notes, and snippets.

@wil3
Created February 5, 2016 20:32
Show Gist options
  • Save wil3/ce520f9bc7f3745c4dfc to your computer and use it in GitHub Desktop.
Save wil3/ce520f9bc7f3745c4dfc to your computer and use it in GitHub Desktop.
import gzip, os, shutil
import cStringIO
from optparse import OptionParser
"""
Calculate the average entropy of files in the given directories
"""
def entropy(f):
""" Calculate the entropy as ratio of compression size / original size
https://www.cs.uaf.edu/2013/spring/cs463/lecture/01_25_entropy.html
Args:
f: binary file
Return:
entropy between 0 - 1 or None if something bad happened
"""
size = os.path.getsize(f) * 1.0
fgz = cStringIO.StringIO()
with open(f, 'rb') as f_in, gzip.GzipFile(filename='', mode='wb', fileobj=fgz) as f_out:
shutil.copyfileobj(f_in, f_out)
fgz.seek(0, os.SEEK_END)
size_compressed = fgz.tell() * 1.0
entropy = size_compressed / size
return entropy
def averageEntropy(startDirs):
numComputed = 0
accumEntropy = 0
for d in startDirs:
for root, dirs, files in os.walk(d):
for filename in files:
try:
path = os.path.join(root, filename)
e = entropy(path)
accumEntropy +=e
numComputed += 1
except Exception as e:
print e
averageEntropy = (accumEntropy * 1.0) / (numComputed * 1.0)
return (averageEntropy, numComputed)
if __name__ == "__main__":
parser = OptionParser()
(options, args) = parser.parse_args()
(e, c) = averageEntropy(args)
print "Average entropy from {} binaries = {}".format(c,e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment