Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python3
"""
Parse scan.io archive to find specific HTTP responses.
Will save positive hits to <timestamp>/<ip>.log
Usage:
$ parse-scans.io.py https://scans.io/data/rapid7/sonar.http/20151110-http.gz
Requires Python 3
Latest Ubuntu requirements: apt-get install python3 python3-ujson python3-requests
"""
import requests
import sys
import gzip
import ujson as json
import base64
import os
try:
url = sys.argv[1]
except:
print("Missing URL as argument")
sys.exit(1)
#url = 'https://scans.io/data/rapid7/sonar.http/20151110-http.gz'
ts = url.split('/')[-1].split('-')[0]
if not os.path.exists(ts):
os.mkdir(ts)
fingerprints = (
b'RegExp("[0-9]{13,16}")',
b'''jQuery('[id*="cc_ss_issue"]').val()''',
b'querySelectorAll("input, select, textarea, checkbox"',
)
lines = 0
resp = requests.get(url, stream=True)
decompressed = gzip.GzipFile(fileobj=resp.raw)
for line in decompressed:
entry = json.loads(line.decode())
decoded = base64.b64decode(entry['data'])
lines += 1
if lines % 10000 == 0:
print("%10d lines" % lines)
for fp in fingerprints:
if fp in decoded:
filename = ts + '/' + entry['ip'] + '.log'
print("\t" + filename)
with open(filename, 'wb') as f:
f.write(decoded)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment