Skip to content

Instantly share code, notes, and snippets.

@deeso
Created March 22, 2018 03:02
Show Gist options
  • Save deeso/8ba611d2a8527e72fb537f548c5e41b6 to your computer and use it in GitHub Desktop.
Save deeso/8ba611d2a8527e72fb537f548c5e41b6 to your computer and use it in GitHub Desktop.
Download new domains from Whois Newly Registered Domains
from pymongo import MongoClient
import json, os, time, signal, threading, sys
from datetime import datetime, timedelta
from gglsbl import SafeBrowsingList
import requests
from datetime import datetime
from datetime import datetime, timedelta
from virus_total_apis import PrivateApi, PublicApi
import argparse
import sys
from io import BytesIO
from zipfile import ZipFile
URLPARSE = None
try:
from urllib.parse import urlparse as URLPARSE
except:
pass
try:
if URLPARSE is None:
from urllib2 import urlparse as URLPARSE
except:
pass
try:
from io import BytesIO
except:
pass
LINUX_DFT_PATH = '/tmp/domains.txt'
MIN_NEW_DOMS = 5
NUM_ITERS = 10
MIN_DAYS = 5
MIN_HITS = 4
DFMT = "%Y-%m-%d"
CMD_DESC = 'evaluate related domains for badness.'
NRD_URL_FMT = "https://whoisds.com//whois-database/newly-registered-domains/{date}.zip/nrd"
parser = argparse.ArgumentParser(description=CMD_DESC)
parser.add_argument('-days', type=str, default=15,
help='number of days to go back')
parser.add_argument('-start_date', type=str, default=datetime.now().strftime(DFMT),
help='Date to download: YYYY-MM-DD')
parser.add_argument('-output_file', type=str, default=LINUX_DFT_PATH,
help='api key for virus total')
parser.add_argument('-mongohost', type=str, default=None,
help='mongohost to save domains too')
parser.add_argument('-mongoport', type=str, default=27017,
help='mongoport to save domains too')
parser.add_argument('-mongodb', type=str, default='nrd',
help='mongo db')
parser.add_argument('-mongocol', type=str, default='new_domains',
help='mongo collection')
def create_url(date):
dt = date.strftime(DFMT)
return NRD_URL_FMT.format(**{"date":dt})
def extract_zip_content(data):
fd = BytesIO(data)
zf = ZipFile(fd)
name = zf.namelist()[0]
domains = zf.read(name).decode('ascii').split()
date = name.split('.')[0]
return date, domains
def download_file_extract(url):
rsp = requests.get(url)
if rsp.status_code == 200:
data = rsp.content
try:
return extract_zip_content(data)
except:
pass
return None, None
def save_to_mongo(mongohost, mongoport, mongodb, mongocol, data):
c = MongoClient(mongohost, mongoport)
db = c[mongodb]
col = db[mongocol]
col.insert_one(data)
c.close()
def perform_file_downloads(start_date, days, mongohost=None,
mongoport=27017, mongodb='nrd',
mongocol='new_domains'):
if days == 0:
days = 1
day = 0
start = datetime.strptime(start_date, DFMT)
results = {}
while day < days:
if (start + timedelta(days=day)) > datetime.now():
break
url = create_url(start + timedelta(days=day))
date, domains = download_file_extract(url)
if date is not None:
results[date] = domains
if mongohost is not None:
data = {'date': date, 'domains': domains}
save_to_mongo(mongohost, mongoport, mongodb, mongocol, data)
day += 1
return results
def do_work(start_date, days, output_file=LINUX_DFT_PATH,
mongohost=None, mongoport=27017, mongodb='nrd',
mongocol='new_domains'):
results = perform_file_downloads(start_date, days, mongohost=mongohost,
mongoport=mongoport, mongodb=mongodb,
mongocol=mongocol)
keys = sorted(results.keys())
with open(output_file, 'w') as out:
for k in keys:
d = ["%s,%s" % (k, v) for v in results[k]]
out.write('\n'.join(d))
return results
if __name__ == "__main__":
args = parser.parse_args()
start_date = args.start_date
days = args.days
output_file = args.output_file
mongohost = args.mongohost
mongoport = args.mongoport
mongodb = args.mongodb
mongocol = args.mongocol
ed = datetime.strptime(start_date, DFMT) + timedelta(days=days)
if ed > datetime.now():
nd = datetime.now()-datetime.strptime(start_date, DFMT)
days = nd.days
days = 1 if days <= 0 else days
ed = datetime.now()
end_date = ed.strftime(DFMT)
print("Downloading NRD from %s to %s" % (start_date, end_date))
r = do_work(start_date, days, output_file,
mongohost, mongoport, mongodb, mongocol)
if len(r) == 0:
print ("No domain lists downloaded")
else:
num_domains = sum([len(v) for v in r.values()])
ed = datetime.strptime(start_date, DFMT) + timedelta(days=days)
end_date = ed.strftime(DFMT)
print("Found %d domains from %s to %s" % (num_domains, start_date, end_date))
for k in sorted(r.keys()):
print("Found %d domains on %s" % (len(r[k]), k))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment