#!/usr/bin/env python3 | |
""" | |
Parse scan.io archive to find specific HTTP responses. | |
Will save positive hits to <timestamp>/<ip>.log | |
Usage: | |
$ parse-scans.io.py https://scans.io/data/rapid7/sonar.http/20151110-http.gz | |
Requires Python 3 | |
Latest Ubuntu requirements: apt-get install python3 python3-ujson python3-requests | |
""" | |
import requests | |
import sys | |
import gzip | |
import ujson as json | |
import base64 | |
import os | |
try: | |
url = sys.argv[1] | |
except: | |
print("Missing URL as argument") | |
sys.exit(1) | |
#url = 'https://scans.io/data/rapid7/sonar.http/20151110-http.gz' | |
ts = url.split('/')[-1].split('-')[0] | |
if not os.path.exists(ts): | |
os.mkdir(ts) | |
fingerprints = ( | |
b'RegExp("[0-9]{13,16}")', | |
b'''jQuery('[id*="cc_ss_issue"]').val()''', | |
b'querySelectorAll("input, select, textarea, checkbox"', | |
) | |
lines = 0 | |
resp = requests.get(url, stream=True) | |
decompressed = gzip.GzipFile(fileobj=resp.raw) | |
for line in decompressed: | |
entry = json.loads(line.decode()) | |
decoded = base64.b64decode(entry['data']) | |
lines += 1 | |
if lines % 10000 == 0: | |
print("%10d lines" % lines) | |
for fp in fingerprints: | |
if fp in decoded: | |
filename = ts + '/' + entry['ip'] + '.log' | |
print("\t" + filename) | |
with open(filename, 'wb') as f: | |
f.write(decoded) | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment