This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# Reads a file like <count>, raw browser string | |
# and processses it to output: | |
# percentage, normalized browser string | |
import sys | |
import md5 | |
from ua_parser import user_agent_parser | |
# beautify ua | |
def nice(ua): | |
user_agent = ua['user_agent'] | |
os = ua['os'] | |
nice = "{0} {1} {2} ".format(user_agent['family'], user_agent['major'],os['family']) | |
if os['major'] is not None and os['major'] is not None: | |
_os = "{0}.{1}".format(ua['os']['major'], ua['os']['minor']) | |
nice = nice + _os | |
return nice | |
def percentage(c, total): | |
return c*100/total | |
name = sys.argv[1] | |
f = open(name) | |
# stores normalized browsers and counts | |
b = {} | |
total = 0; | |
for l in f: | |
items = l.split("\t") | |
# number of ocurrences | |
c = float(items[0]) | |
total = total +c | |
raw_ua = items[1] | |
ua = user_agent_parser.Parse(raw_ua) | |
# hash the normalized ua and store counts | |
h = md5.new() | |
s = nice(ua) | |
h.update(s) | |
key = h.digest() | |
if b.get(key) == None: | |
b[key] = [c, s] | |
else: | |
# increment count | |
b[key][0] = b[key][0] + c | |
for k in b.keys(): | |
p = percentage(b[k][0],total) | |
percentage_string = '{:.2f}'.format(p) | |
print "{0}% {1}".format(percentage_string , b[k][1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment