Last active
August 29, 2015 14:14
-
-
Save nuria/072a6766440b356364b7 to your computer and use it in GitHub Desktop.
Calculates user agent percentages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
# read file | |
# File format is: | |
# {"browser_major":"1","os_family":"Android","os_major":"1","device_family":"Opus One","browser_family":"Android","os_minor":"5"} 5 | |
# {"browser_major":"1","os_family":"Android","os_major":"4","device_family":"icube 900","browser_family":"Baidu Explorer","os_minor":"2"} 1 | |
# hash and store values | |
# loop over values and | |
# print calculated percentages | |
# that are over a certain threshold | |
import md5 | |
import heapq as hp | |
ua = {} | |
total = 0 | |
h = [] | |
f = open('./output-tablesample.sql') | |
for line in f: | |
items = line.split("\t") | |
m = md5.new() | |
# convert json into md5 to key the hash | |
m.update(items[0]) | |
ua[m.digest()] = items | |
total = total + int(items[1]) | |
# calculate percentages` | |
for k in ua.keys(): | |
data = ua[k] | |
record = data[0] | |
cardinal = int(data[1]) | |
percentage = float(100* cardinal)/total | |
hp.heappush(h, (round(percentage,4), record)) | |
# print percentages | |
percentage_total = 0; | |
while h: | |
data = hp.heappop(h) | |
percentage_total = percentage_total + data[0] | |
# data[0] is the percentage | |
if data[0] >= 0.0001: | |
print " {0}% {1}".format(data[0],data[1]) | |
# to double check: | |
# print percentage_total |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment