-
-
Save neeksor/37023df3d8db6c1905994a40f9654859 to your computer and use it in GitHub Desktop.
take list of domains, prepend www. visit http:// , check content for flagged words.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
404 Not Found | |
500 Server Error | |
401 Authorization Required | |
temporarily unavailable | |
Welcome to nginx | |
Apache 2 Test Page | |
www.example.com | |
mcc.godaddy.com/park/ | |
search_caf.php |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import with_statement | |
import random | |
from Queue import Queue | |
from threading import Thread | |
import httplib | |
import sys | |
def logger(oq): | |
count = 0 | |
fo = open('results.txt', 'a') | |
fr = open('redirects.txt', 'a') | |
while True: | |
item = oq.get() | |
if item: | |
if item.lower().find('redirect') > -1: | |
fr.write("%s\n" % item) | |
fr.flush() | |
else: | |
fo.write("%s\n" % item) | |
fo.flush() | |
def worker(oq): | |
while True: | |
domain = q.get() | |
result = fetch_url(bad_words, (random.choice(local_ips), 0), domain) | |
if result is not 'OK': | |
oq.put(result) | |
q.task_done() | |
def fetch_url(bad_words, source_ip, url): | |
nurl = url.replace('http://', '') | |
try: | |
h = httplib.HTTPConnection(nurl, port=80, source_address=source_ip, timeout=10) | |
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'} | |
h.request("GET", "/", "", header) | |
resp = h.getresponse() | |
content = resp.read() | |
for word in bad_words: | |
if content.lower().find(word.lower()) > -1: | |
return "MATCHED,%s,%s" % (nurl, word) | |
if 300 <= resp.status <= 399: | |
return "redirect,%s,%s,%s" % (nurl, resp.status, resp.getheader('location')) | |
if resp.status is not 200: | |
return "non200,%s,%s" % (nurl, resp.status) | |
return "OK" | |
except Exception as e: | |
return "failed,%s,%s" % (nurl, e) | |
if __name__ == '__main__': | |
with open("filters.txt", "r") as fil: | |
bad_words = [x for x in fil.read().split('\n') if x] | |
with open("ips.txt", "r") as fil: | |
local_ips = [x for x in fil.read().split('\n') if x] | |
q = Queue() | |
oq = Queue() | |
workers = [] | |
wt = Thread(target=logger, args=(oq,)) | |
wt.setDaemon(True) | |
workers.append(wt) | |
wt.start() | |
for i in range(1024): | |
t = Thread(target=worker, args=(oq,)) | |
t.setDaemon(True) | |
workers.append(t) | |
t.start() | |
with open("urls.txt", "r") as f: | |
for line in f: | |
q.put(line.replace('\n', '')) | |
q.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment