Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
from collections import defaultdict
import json
import sys
import traceback
from cStringIO import StringIO
import os
import logging
pymap = map
BYSESSION =True if "BYSESSION" in os.environ else False
#if bysession is true, we only want to care about search data. ow we're going to crash big time.
def enum_paths(dct, path=[]):
if not hasattr(dct, 'items'):
yield path
for k,v in dct.iteritems():
for p in enum_paths(v, path + [k]):
yield p
def map(k, d, v, cx):
payload_out = []
j = json.loads(v)
env = j["environment"]["settings"]
sysinfo = j["info"]
locale = sysinfo["locale"]
if not "clientID" in j:
logging.warning("no clientID")
if j["clientID"] is None: #apparently this can happen
logging.warning("clientID is None")
clientid = j["clientID"].encode('utf-8')
if "defaultSearchEngineData" in env:
sdata = env["defaultSearchEngineData"]
tup = (clientid, locale, sysinfo["appUpdateChannel"].encode('utf-8'), sysinfo["subsessionStartDate"].encode('utf-8'), env["defaultSearchEngine"].encode('utf-8'), sdata["name"].encode('utf-8'), sdata.get("loadPath", "None").encode('utf-8'), sdata.get("submissionURL", "None").encode('utf-8'))
cx.write(tup, 1)
except Exception, e:
print >> sys.stderr, "ERROR:", e
print >> sys.stderr, traceback.format_exc()
cx.write("ERROR:", str(e))
def distn(lst):
dist = defaultdict(int)
for i in lst:
dist[i] += 1
return dict(dist)
def reduce(k, v, cx):
if k == "JSON PARSE ERROR:":
for i in set(v):
cx.write(k, i)
if BYSESSION and "search" not in k.lower() and "uptime" not in k.lower():
return #too much info right now. we'll run out of memory
cx.write(k, sum(pymap(int, v)))
except Exception, e:
print >> sys.stderr, "ERROR:", e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment