Skip to content

Instantly share code, notes, and snippets.

@ilanasegall
Created July 9, 2015 20:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ilanasegall/f956aaca9c6bcfa25b59 to your computer and use it in GitHub Desktop.
Save ilanasegall/f956aaca9c6bcfa25b59 to your computer and use it in GitHub Desktop.
search_hardening_mr.py
from collections import defaultdict
import json
import sys
import traceback
from cStringIO import StringIO
import os
import logging
logging.basicConfig(filename=os.environ["OUTPUTDIR"]+'warning.log',level=logging.DEBUG)
pymap = map
BYSESSION =True if "BYSESSION" in os.environ else False
#if bysession is true, we only want to care about search data. ow we're going to crash big time.
def enum_paths(dct, path=[]):
if not hasattr(dct, 'items'):
path.append(dct)
yield path
return
for k,v in dct.iteritems():
for p in enum_paths(v, path + [k]):
yield p
def map(k, d, v, cx):
payload_out = []
try:
j = json.loads(v)
env = j["environment"]["settings"]
sysinfo = j["info"]
locale = sysinfo["locale"]
if not "clientID" in j:
logging.warning("no clientID")
return
if j["clientID"] is None: #apparently this can happen
logging.warning("clientID is None")
return
clientid = j["clientID"].encode('utf-8')
if "defaultSearchEngineData" in env:
sdata = env["defaultSearchEngineData"]
tup = (clientid, locale, sysinfo["appUpdateChannel"].encode('utf-8'), sysinfo["subsessionStartDate"].encode('utf-8'), env["defaultSearchEngine"].encode('utf-8'), sdata["name"].encode('utf-8'), sdata.get("loadPath", "None").encode('utf-8'), sdata.get("submissionURL", "None").encode('utf-8'))
else:
return
cx.write(tup, 1)
except Exception, e:
print >> sys.stderr, "ERROR:", e
print >> sys.stderr, traceback.format_exc()
cx.write("ERROR:", str(e))
def distn(lst):
dist = defaultdict(int)
for i in lst:
dist[i] += 1
return dict(dist)
def reduce(k, v, cx):
if k == "JSON PARSE ERROR:":
for i in set(v):
cx.write(k, i)
return
try:
if BYSESSION and "search" not in k.lower() and "uptime" not in k.lower():
return #too much info right now. we'll run out of memory
cx.write(k, sum(pymap(int, v)))
except Exception, e:
print >> sys.stderr, "ERROR:", e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment