Skip to content

Instantly share code, notes, and snippets.

@joshenders
Created March 30, 2015 06:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshenders/a4aa6fe0e83d7568d3e3 to your computer and use it in GitHub Desktop.
Save joshenders/a4aa6fe0e83d7568d3e3 to your computer and use it in GitHub Desktop.
stats-by-country.py
#!/usr/bin/python3.4
import math
import statistics as stats
import signal
import sys
from os.path import basename
# Hide stack trace from KeyboardInterrupt
signal.signal(signal.SIGINT, lambda signal, frame: sys.exit(1))
def main():
if len(sys.argv) != 2:
sys.exit("%s: <infile>" % basename(sys.argv[0]))
# Country codes conform to format defined by ISO 3166-1 alpha-2
ccode = basename(sys.argv[1]).split('.')[0]
percentile = {.25: '', .75: '', .85: '', .90: '', .95: '', .99: ''}
with open(sys.argv[1], 'rU') as f:
infile = sorted([int(line.split(',')[6].partition('.')[0])
for line in f])
total = len(infile)
maximum = infile[-1]
# Header
print("Country\tp25\tp75\tp85\tp90\tp95\tp99\tMean\tMedian\tMode\tMax\t"
"Sigma\tCV\t%RSD\tIQR\tSamples")
# Country
print("%s\t" % (ccode), end='')
# Percentiles
for p in sorted(percentile.keys()):
i = math.ceil(total * p)
percentile[p] = infile[i]
print("%d\t" % percentile.get(p), end='')
# Misc stats
try:
mode = stats.mode(infile)
except stats.StatisticsError:
mode = 'x'
mean = stats.mean(infile)
median = stats.median(infile)
sigma = stats.stdev(infile)
cv = (sigma/mean)
rsd = ((sigma/mean) * 100)
iqr = ((percentile.get(.75) - percentile.get(.25)) / median)
print("%d\t%d\t%s\t%d\t%d\t%0.4f\t%d%%\t%0.4f\t%d" %
(mean, median, mode, maximum, sigma, cv, rsd, iqr, total))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment