Skip to content

Instantly share code, notes, and snippets.

@nuria
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nuria/072a6766440b356364b7 to your computer and use it in GitHub Desktop.
Save nuria/072a6766440b356364b7 to your computer and use it in GitHub Desktop.
Calculates user agent percentages
#!/usr/lib/python
# read file
# File format is:
# {"browser_major":"1","os_family":"Android","os_major":"1","device_family":"Opus One","browser_family":"Android","os_minor":"5"} 5
# {"browser_major":"1","os_family":"Android","os_major":"4","device_family":"icube 900","browser_family":"Baidu Explorer","os_minor":"2"} 1
# hash and store values
# loop over values and
# print calculated percentages
# that are over a certain threshold
import md5
import heapq as hp
ua = {}
total = 0
h = []
f = open('./output-tablesample.sql')
for line in f:
items = line.split("\t")
m = md5.new()
# convert json into md5 to key the hash
m.update(items[0])
ua[m.digest()] = items
total = total + int(items[1])
# calculate percentages`
for k in ua.keys():
data = ua[k]
record = data[0]
cardinal = int(data[1])
percentage = float(100* cardinal)/total
hp.heappush(h, (round(percentage,4), record))
# print percentages
percentage_total = 0;
while h:
data = hp.heappop(h)
percentage_total = percentage_total + data[0]
# data[0] is the percentage
if data[0] >= 0.0001:
print " {0}% {1}".format(data[0],data[1])
# to double check:
# print percentage_total
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment