Skip to content

Instantly share code, notes, and snippets.

@michellesun
Created September 5, 2012 16:02
Show Gist options
  • Save michellesun/3639040 to your computer and use it in GitHub Desktop.
Save michellesun/3639040 to your computer and use it in GitHub Desktop.
Hello Challenge Part 2
import json
import bsddb3 as bsddb
import re
import collections
# part 1
# get top ten devices
def findtopten(db):
device = {}
for key in db.keys():
device_name = db[key].split(",")[1]
if device_name not in device.keys():
device[device_name] = 1
else:
device[device_name] += 1
topten = sorted(device.items(),key=lambda x:x)
return topten
# part 2
# report the percentage of matches that are
# cross-platform (Android to iOS or vice versa).
def crossplatform(db, filename, total=1091608):
# got the lines of matches.txt by command
# wc -l matches.txt
cross_count = 0
f = open(filename)
for line in f:
matchpair = line.strip().split(" MATCH ")[1]
match = json.loads(matchpair)
userid1 = match["userid1"]
userid2 = match["userid2"]
platform1 = db[userid1][0]
platform2 = db[userid2][0]
if platform1 == platform2:
cross_count += 1
percent_crossplatform = cross_count / float(total) * 100
return percent_crossplatform
# Part 3: what are the top 3 busiest hours for bumping (matched ones)
def getbumplist(filename):
f = open(filename,"r")
hourlist = []
line = f.readline()
f = open(filename)
for line in f:
date = line.strip().split(" MATCH ")[0]
hour = re.findall(r'\w+', date)[3]
# only get the hour, '00'
hourlist.append(hour)
return hourlist
def topthreehours(hourlist):
counter = collections.Counter(hourlist)
topthreehrs = (counter.most_common(3))
return topthreehrs
def main():
db = bsddb.btopen("users.db")
# { "1293483219": "ios,iphone", "12094389": "andriod,htcxyz"}
# # part 1: prints the top ten devices among our users
topten = findtopten(db)
print "Top ten devices for bump users are"
for pair in topten:
print pair[0],
# part 2: percentage of matches that are cross-platform
filename = 'matches.txt'
print "Percentage of crossplatform matches is", crossplatform(db, filename)
# part 3: top 3 busiest hours for bumping
hourlist = getbumplist(filename)
topthreehrs = topthreehours(hourlist)
print "Top 3 busiest hours for users to successfully bump is: "
for item in topthreehrs:
print item[0],
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment