Skip to content

Instantly share code, notes, and snippets.

@manasmbellani
Created October 1, 2017 02:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save manasmbellani/d2ef11a854d698779ab7767f99d55f08 to your computer and use it in GitHub Desktop.
Save manasmbellani/d2ef11a854d698779ab7767f99d55f08 to your computer and use it in GitHub Desktop.
recon_info_netcraft.py - Script to download sub domains for a given domain from netcraft.com
#!/usr/bin/python3
import subprocess
import shlex
import requests
from bs4 import BeautifulSoup
from sys import exit
from argparse import ArgumentParser, RawTextHelpFormatter
### URL to searchdns.netcraft.com to get all the domains
NETCRAFT_DOMAIN_NAME = "http://searchdns.netcraft.com"
NETCRAFT_REQUEST_URL = "/?restriction=site+contains&host="
### Write local output from a command to this file ###
DEFAULT_OUTPUT_FILE = "/tmp/out10.txt"
### User Agent String for web requests
USER_AGENT_STRING = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/60.0.3112.113 Safari/537.36")
### TO_MODIFY: Define the description of the script
DEFINITION = "Runs crtsh to get the domains for the certificates"
def write_output_to_file(outputfile, output):
with open(outputfile, "wb+") as f:
f.write(output.encode("utf-8"))
def make_get_request_via_browser(url, browser=None):
""" Make request for a URL using existing browser, or create a new browser tab """
if browser:
browser.get(url)
else:
browser = webdriver.Firefox()
browser.get(url)
return browser
def make_get_request(url):
""" Make Get request to a URL """
headers = {"User-Agent": USER_AGENT_STRING}
resp = requests.get(url, headers=headers)
return resp
def main():
parser = ArgumentParser(description=DEFINITION)
### TO_MODIFY: Define the arguments that script takes
parser.add_argument("--domain", dest="domain", action="store", required=True)
parser.add_argument("--outputfile", dest="outputfile", action="store", required=False)
### Read the arguments, and update executable locations to config
args = parser.parse_args()
config = vars(args)
### All output from commands executed are stored here
output = ""
domain = config["domain"]
num_pages_parsed = 0
all_pages_parsed = False
all_domains = set()
url = NETCRAFT_DOMAIN_NAME + NETCRAFT_REQUEST_URL + "{}".format(domain)
while not all_pages_parsed:
print("[i] Making request to url {} to get all domains".format(url))
resp = make_get_request(url)
print("[i] Response length: {}".format(len(resp.text)))
bs = BeautifulSoup(resp.text, "lxml")
print("[i] Parsing page {}".format(num_pages_parsed + 1))
print("[i] Getting results table")
results_table = None
try:
results_table = bs.find(attrs={"class": "TBtable"})
except Exception as e:
print("[-] Results table not found.")
print("[-] Error: {}".format(e))
exit(1)
if results_table:
print("[i] Parsing results table (excluding header row)")
result_rows = None
try:
result_rows = results_table.findAll("tr")
result_rows = result_rows[1:]
except Exception as e:
print("[-] result_rows not found")
print("[-] Error: {}".format(e))
exit(1)
if result_rows:
print("[i] Parsing url from each table row")
try:
get_url = lambda row: row.findAll("td")[1].find("a").text
domains_on_this_page = [get_url(r) for r in result_rows]
except Exception as e:
print("[-] Cannot parse domain")
print("[-] Error: {}".format(e))
exit(1)
print("[+] Number of domains parsed from the page: {}".format(len(domains_on_this_page)))
### Display the domains discovered
output = "\n".join(domains_on_this_page)
print("[+] Domains discovered on this page:\n{}\n".format(output))
### add domains on this page to the current list of domains
all_domains.update(domains_on_this_page)
### is there another page?
all_urls_on_page = bs.findAll("a")
next_page_url_elem = [elem for elem in all_urls_on_page if "Next page" in elem.text]
if next_page_url_elem:
### Get the next page
next_page_url = next_page_url_elem[0].get("href")
url = NETCRAFT_DOMAIN_NAME + next_page_url
num_pages_parsed += 1
else:
### No next page, all URLs have been parsed
all_pages_parsed = True
### Finally write the output to the specified file, if defined
output = "\n".join(all_domains)
print("[+] Total number of domains discovered: {}".format(len(all_domains)))
print("[+] Total domains discovered:\n{}\n".format(output))
if "outputfile" in config and config["outputfile"]:
write_output_to_file(config["outputfile"], output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment