Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/a33bbf20b77ca55ee07ed0ebce5a2091 to your computer and use it in GitHub Desktop.
Save peterbe/a33bbf20b77ca55ee07ed0ebce5a2091 to your computer and use it in GitHub Desktop.
import time
import requests
from urllib import urlencode
URL = "https://crash-stats.mozilla.com/api/SuperSearch/"
params = {
'ipc_channel_error': '!__null__',
'date': '>=2016-04-27',
'_columns': ['uuid'],
}
crash_ids = []
total = None
def download(auth_token, offset=0):
p = dict(
params,
_results_offset=offset,
_results_number=500,
)
url = URL + '?' + urlencode(p, True)
print url
results = requests.get(url, headers={
'Auth-Token': auth_token
}).json()
global total
if total is None:
total = results['total']
print "TOTAL", total
crash_ids.extend([
x['uuid'] for x in results['hits']
])
print "\t#crashes", len(crash_ids)
print "\toffset", offset
if offset < total:
time.sleep(2) # be nice
download(auth_token, offset + 500)
import sys
download(sys.argv[1])
print len(crash_ids)
print "#unique", len(set(crash_ids))
# import json
# with open('crash_ids_1270211.json', 'w') as f:
# json.dump({'crash_ids': list(set(crash_ids))}, f, indent=2)
sql = "INSERT INTO reprocessing_jobs (crash_id) VALUES "
sql += ',\n'.join("('%s')"%x for x in list(set(crash_ids)))
sql += ';\n'
open('crash_ids_1270211.sql', 'w').write(sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment