Created
March 22, 2018 03:02
-
-
Save deeso/8ba611d2a8527e72fb537f548c5e41b6 to your computer and use it in GitHub Desktop.
Download new domains from Whois Newly Registered Domains
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
import json, os, time, signal, threading, sys | |
from datetime import datetime, timedelta | |
from gglsbl import SafeBrowsingList | |
import requests | |
from datetime import datetime | |
from datetime import datetime, timedelta | |
from virus_total_apis import PrivateApi, PublicApi | |
import argparse | |
import sys | |
from io import BytesIO | |
from zipfile import ZipFile | |
URLPARSE = None | |
try: | |
from urllib.parse import urlparse as URLPARSE | |
except: | |
pass | |
try: | |
if URLPARSE is None: | |
from urllib2 import urlparse as URLPARSE | |
except: | |
pass | |
try: | |
from io import BytesIO | |
except: | |
pass | |
LINUX_DFT_PATH = '/tmp/domains.txt' | |
MIN_NEW_DOMS = 5 | |
NUM_ITERS = 10 | |
MIN_DAYS = 5 | |
MIN_HITS = 4 | |
DFMT = "%Y-%m-%d" | |
CMD_DESC = 'evaluate related domains for badness.' | |
NRD_URL_FMT = "https://whoisds.com//whois-database/newly-registered-domains/{date}.zip/nrd" | |
parser = argparse.ArgumentParser(description=CMD_DESC) | |
parser.add_argument('-days', type=str, default=15, | |
help='number of days to go back') | |
parser.add_argument('-start_date', type=str, default=datetime.now().strftime(DFMT), | |
help='Date to download: YYYY-MM-DD') | |
parser.add_argument('-output_file', type=str, default=LINUX_DFT_PATH, | |
help='api key for virus total') | |
parser.add_argument('-mongohost', type=str, default=None, | |
help='mongohost to save domains too') | |
parser.add_argument('-mongoport', type=str, default=27017, | |
help='mongoport to save domains too') | |
parser.add_argument('-mongodb', type=str, default='nrd', | |
help='mongo db') | |
parser.add_argument('-mongocol', type=str, default='new_domains', | |
help='mongo collection') | |
def create_url(date): | |
dt = date.strftime(DFMT) | |
return NRD_URL_FMT.format(**{"date":dt}) | |
def extract_zip_content(data): | |
fd = BytesIO(data) | |
zf = ZipFile(fd) | |
name = zf.namelist()[0] | |
domains = zf.read(name).decode('ascii').split() | |
date = name.split('.')[0] | |
return date, domains | |
def download_file_extract(url): | |
rsp = requests.get(url) | |
if rsp.status_code == 200: | |
data = rsp.content | |
try: | |
return extract_zip_content(data) | |
except: | |
pass | |
return None, None | |
def save_to_mongo(mongohost, mongoport, mongodb, mongocol, data): | |
c = MongoClient(mongohost, mongoport) | |
db = c[mongodb] | |
col = db[mongocol] | |
col.insert_one(data) | |
c.close() | |
def perform_file_downloads(start_date, days, mongohost=None, | |
mongoport=27017, mongodb='nrd', | |
mongocol='new_domains'): | |
if days == 0: | |
days = 1 | |
day = 0 | |
start = datetime.strptime(start_date, DFMT) | |
results = {} | |
while day < days: | |
if (start + timedelta(days=day)) > datetime.now(): | |
break | |
url = create_url(start + timedelta(days=day)) | |
date, domains = download_file_extract(url) | |
if date is not None: | |
results[date] = domains | |
if mongohost is not None: | |
data = {'date': date, 'domains': domains} | |
save_to_mongo(mongohost, mongoport, mongodb, mongocol, data) | |
day += 1 | |
return results | |
def do_work(start_date, days, output_file=LINUX_DFT_PATH, | |
mongohost=None, mongoport=27017, mongodb='nrd', | |
mongocol='new_domains'): | |
results = perform_file_downloads(start_date, days, mongohost=mongohost, | |
mongoport=mongoport, mongodb=mongodb, | |
mongocol=mongocol) | |
keys = sorted(results.keys()) | |
with open(output_file, 'w') as out: | |
for k in keys: | |
d = ["%s,%s" % (k, v) for v in results[k]] | |
out.write('\n'.join(d)) | |
return results | |
if __name__ == "__main__": | |
args = parser.parse_args() | |
start_date = args.start_date | |
days = args.days | |
output_file = args.output_file | |
mongohost = args.mongohost | |
mongoport = args.mongoport | |
mongodb = args.mongodb | |
mongocol = args.mongocol | |
ed = datetime.strptime(start_date, DFMT) + timedelta(days=days) | |
if ed > datetime.now(): | |
nd = datetime.now()-datetime.strptime(start_date, DFMT) | |
days = nd.days | |
days = 1 if days <= 0 else days | |
ed = datetime.now() | |
end_date = ed.strftime(DFMT) | |
print("Downloading NRD from %s to %s" % (start_date, end_date)) | |
r = do_work(start_date, days, output_file, | |
mongohost, mongoport, mongodb, mongocol) | |
if len(r) == 0: | |
print ("No domain lists downloaded") | |
else: | |
num_domains = sum([len(v) for v in r.values()]) | |
ed = datetime.strptime(start_date, DFMT) + timedelta(days=days) | |
end_date = ed.strftime(DFMT) | |
print("Found %d domains from %s to %s" % (num_domains, start_date, end_date)) | |
for k in sorted(r.keys()): | |
print("Found %d domains on %s" % (len(r[k]), k)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment