manasmbellani/recon_info_netcraft.py

## recon_info_netcraft.py
#!/usr/bin/python3

import subprocess
import shlex
import requests
from bs4 import BeautifulSoup
from sys import exit
from argparse import ArgumentParser, RawTextHelpFormatter


### URL to searchdns.netcraft.com to get all the domains
NETCRAFT_DOMAIN_NAME = "http://searchdns.netcraft.com"
NETCRAFT_REQUEST_URL = "/?restriction=site+contains&host="

### Write local output from a command to this file ###
DEFAULT_OUTPUT_FILE = "/tmp/out10.txt"

### User Agent String for web requests
USER_AGENT_STRING = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
                     "AppleWebKit/537.36 (KHTML, like Gecko) "
                     "Chrome/60.0.3112.113 Safari/537.36")

### TO_MODIFY: Define the description of the script
DEFINITION = "Runs crtsh to get the domains for the certificates"

def write_output_to_file(outputfile, output):
    with open(outputfile, "wb+") as f:
        f.write(output.encode("utf-8"))

def make_get_request_via_browser(url, browser=None):
    """ Make request for a URL using existing browser, or create a new browser tab """
    if browser:
        browser.get(url)
    else:
        browser = webdriver.Firefox()
        browser.get(url)

    return browser

def make_get_request(url):
    """ Make Get request to a URL """
    headers = {"User-Agent": USER_AGENT_STRING}
    resp = requests.get(url, headers=headers)
    return resp

def main():
    parser = ArgumentParser(description=DEFINITION)

    ### TO_MODIFY: Define the arguments that script takes
    parser.add_argument("--domain", dest="domain", action="store", required=True)
    parser.add_argument("--outputfile", dest="outputfile", action="store", required=False)

    ### Read the arguments, and update executable locations to config
    args = parser.parse_args()
    config = vars(args)

    ### All output from commands executed are stored here
    output = ""


    domain = config["domain"]

    num_pages_parsed = 0
    all_pages_parsed = False
    all_domains = set()
    url = NETCRAFT_DOMAIN_NAME + NETCRAFT_REQUEST_URL + "{}".format(domain)
    while not all_pages_parsed:
        print("[i] Making request to url {} to get all domains".format(url))
        resp = make_get_request(url)
        print("[i] Response length: {}".format(len(resp.text)))

        bs = BeautifulSoup(resp.text, "lxml")

        print("[i] Parsing page {}".format(num_pages_parsed + 1))
        print("[i] Getting results table")
        results_table = None
        try:
            results_table = bs.find(attrs={"class": "TBtable"})
        except Exception as e:
            print("[-] Results table not found.")
            print("[-] Error: {}".format(e))
            exit(1)

        if results_table:
            print("[i] Parsing results table (excluding header row)")
            result_rows = None
            try:
                result_rows = results_table.findAll("tr")
                result_rows = result_rows[1:]
            except Exception as e:
                print("[-] result_rows not found")
                print("[-] Error: {}".format(e))
                exit(1)

            if result_rows:
                print("[i] Parsing url from each table row")
                try:
                    get_url = lambda row: row.findAll("td")[1].find("a").text
                    domains_on_this_page = [get_url(r) for r in result_rows]
                except Exception as e:
                    print("[-] Cannot parse domain")
                    print("[-] Error: {}".format(e))
                    exit(1)

        print("[+] Number of domains parsed from the page: {}".format(len(domains_on_this_page)))

        ### Display the domains discovered
        output = "\n".join(domains_on_this_page)
        print("[+] Domains discovered on this page:\n{}\n".format(output))

        ### add domains on this page to the current list of domains
        all_domains.update(domains_on_this_page)

        ### is there another page?
        all_urls_on_page = bs.findAll("a")
        next_page_url_elem = [elem for elem in all_urls_on_page if "Next page" in elem.text]

        if next_page_url_elem:
            ### Get the next page
            next_page_url = next_page_url_elem[0].get("href")
            url = NETCRAFT_DOMAIN_NAME + next_page_url
            num_pages_parsed += 1
        else:
            ### No next page, all URLs have been parsed
            all_pages_parsed = True

    ### Finally write the output to the specified file, if defined
    output = "\n".join(all_domains)
    print("[+] Total number of domains discovered: {}".format(len(all_domains)))
    print("[+] Total domains discovered:\n{}\n".format(output))
    if "outputfile" in config and config["outputfile"]:
        write_output_to_file(config["outputfile"], output)

if __name__ == "__main__":
    main()
	#!/usr/bin/python3

	import subprocess
	import shlex
	import requests
	from bs4 import BeautifulSoup
	from sys import exit
	from argparse import ArgumentParser, RawTextHelpFormatter


	### URL to searchdns.netcraft.com to get all the domains
	NETCRAFT_DOMAIN_NAME = "http://searchdns.netcraft.com"
	NETCRAFT_REQUEST_URL = "/?restriction=site+contains&host="

	### Write local output from a command to this file ###
	DEFAULT_OUTPUT_FILE = "/tmp/out10.txt"

	### User Agent String for web requests
	USER_AGENT_STRING = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/60.0.3112.113 Safari/537.36")

	### TO_MODIFY: Define the description of the script
	DEFINITION = "Runs crtsh to get the domains for the certificates"

	def write_output_to_file(outputfile, output):
	with open(outputfile, "wb+") as f:
	f.write(output.encode("utf-8"))

	def make_get_request_via_browser(url, browser=None):
	""" Make request for a URL using existing browser, or create a new browser tab """
	if browser:
	browser.get(url)
	else:
	browser = webdriver.Firefox()
	browser.get(url)

	return browser

	def make_get_request(url):
	""" Make Get request to a URL """
	headers = {"User-Agent": USER_AGENT_STRING}
	resp = requests.get(url, headers=headers)
	return resp

	def main():
	parser = ArgumentParser(description=DEFINITION)

	### TO_MODIFY: Define the arguments that script takes
	parser.add_argument("--domain", dest="domain", action="store", required=True)
	parser.add_argument("--outputfile", dest="outputfile", action="store", required=False)

	### Read the arguments, and update executable locations to config
	args = parser.parse_args()
	config = vars(args)

	### All output from commands executed are stored here
	output = ""


	domain = config["domain"]

	num_pages_parsed = 0
	all_pages_parsed = False
	all_domains = set()
	url = NETCRAFT_DOMAIN_NAME + NETCRAFT_REQUEST_URL + "{}".format(domain)
	while not all_pages_parsed:
	print("[i] Making request to url {} to get all domains".format(url))
	resp = make_get_request(url)
	print("[i] Response length: {}".format(len(resp.text)))

	bs = BeautifulSoup(resp.text, "lxml")

	print("[i] Parsing page {}".format(num_pages_parsed + 1))
	print("[i] Getting results table")
	results_table = None
	try:
	results_table = bs.find(attrs={"class": "TBtable"})
	except Exception as e:
	print("[-] Results table not found.")
	print("[-] Error: {}".format(e))
	exit(1)

	if results_table:
	print("[i] Parsing results table (excluding header row)")
	result_rows = None
	try:
	result_rows = results_table.findAll("tr")
	result_rows = result_rows[1:]
	except Exception as e:
	print("[-] result_rows not found")
	print("[-] Error: {}".format(e))
	exit(1)

	if result_rows:
	print("[i] Parsing url from each table row")
	try:
	get_url = lambda row: row.findAll("td")[1].find("a").text
	domains_on_this_page = [get_url(r) for r in result_rows]
	except Exception as e:
	print("[-] Cannot parse domain")
	print("[-] Error: {}".format(e))
	exit(1)

	print("[+] Number of domains parsed from the page: {}".format(len(domains_on_this_page)))

	### Display the domains discovered
	output = "\n".join(domains_on_this_page)
	print("[+] Domains discovered on this page:\n{}\n".format(output))

	### add domains on this page to the current list of domains
	all_domains.update(domains_on_this_page)

	### is there another page?
	all_urls_on_page = bs.findAll("a")
	next_page_url_elem = [elem for elem in all_urls_on_page if "Next page" in elem.text]

	if next_page_url_elem:
	### Get the next page
	next_page_url = next_page_url_elem[0].get("href")
	url = NETCRAFT_DOMAIN_NAME + next_page_url
	num_pages_parsed += 1
	else:
	### No next page, all URLs have been parsed
	all_pages_parsed = True

	### Finally write the output to the specified file, if defined
	output = "\n".join(all_domains)
	print("[+] Total number of domains discovered: {}".format(len(all_domains)))
	print("[+] Total domains discovered:\n{}\n".format(output))
	if "outputfile" in config and config["outputfile"]:
	write_output_to_file(config["outputfile"], output)

	if __name__ == "__main__":
	main()