Skip to content

Instantly share code, notes, and snippets.

@hmml
Last active January 1, 2016 13:29
Show Gist options
  • Save hmml/8151335 to your computer and use it in GitHub Desktop.
Save hmml/8151335 to your computer and use it in GitHub Desktop.
Beford's law in action.
import os, sys, time
def print_stats(buckets):
# source: http://mathworld.wolfram.com/BenfordsLaw.html
benford = (0.30103, 0.176091, 0.124939, 0.09691, 0.0791812, 0.0669468, 0.0579919, 0.0511525, 0.0457575)
try:
vals = [float(buckets[x])/sum(buckets) for x in range(len(buckets))]
print 'Digit\tResult\tBenford\tDifference'
for i, val in enumerate(vals):
error = (benford[i] - vals[i])
print '%d\t%.4f\t%.4f\t%+.4f' % (i + 1, vals[i], benford[i], error)
except:
print 'Error calculating stats...'
def do_stats(start_dir):
count = 0
buckets = [0 for x in range(9)]
start_time = time.time()
for root, attr, files in os.walk(start_dir):
for fname in files:
try:
size = os.stat(os.path.join(root, fname))[6]
if size != 0:
count += 1
if count % 512 == 0:
print '\rExamined files: %d' % count
buckets[int(str(size)[0]) - 1] += 1
except:
pass
print 'Total samples:', count
print 'Start directory:', start_dir
print 'Total time: %0.2f seconds' % (time.time() - start_time)
print_stats(buckets)
if __name__ == '__main__':
if not sys.argv[1:] or sys.argv[1:] and sys.argv[1] == '-h':
print 'Examine file sizes from given directory to check Benford\'s Law.'
print 'For more information see: http://mathworld.wolfram.com/BenfordsLaw.html'
print
print 'Usage: %s <path>' % os.path.basename(sys.argv[0])
print 'Example: %s c:/' % os.path.basename(sys.argv[0])
sys.exit(1)
do_stats(sys.argv[1])
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment