spenkk/dod-domains.py

## dod-domains.py
#!/usr/bin/env python

import re
import os
import string
import requests
import colorama
from colorama import Fore, Style
from bs4 import BeautifulSoup


def withDescription():
	for website_name in website_table_items:
		web = website_name.contents[-1].strip()
		print(website_name['href'], Fore.GREEN + '"' + web + '"' + Style.RESET_ALL)
		urls = website_name['href'] + ', "' + web + '"'
		with open('all.txt', 'a') as out:
			out.write(urls + '\n')


def onlyDomains():
	with open("all.txt") as lines:
		for i in lines:
			url = i.split(",")[0]
			domain = url.split("//")[-1].split("/")[0]
			# print(Fore.GREEN + domain + Style.RESET_ALL)

			with open ('domains-temp.txt', 'a') as temp:
					temp.write(domain + '\n')

		print(Fore.RED + '[*] Sorting and removing duplicates.' + Style.RESET_ALL)
		lines_seen = set()
		outfile = open('domains.txt', 'w')
		for line in open('domains-temp.txt', 'r'):
			if line not in lines_seen:
				outfile.write(line)
				lines_seen.add(line)
		outfile.close()
		os.remove('domains-temp.txt')


alphabet = ['0-9']
for i in string.ascii_uppercase:
	alphabet.append(i)

for char in alphabet:
	page = requests.get('https://www.defense.gov/Resources/Military-Departments/A-Z-List/?page={}'.format(char))
	soup = BeautifulSoup(page.text, 'html.parser')
	website_table = soup.find(class_='DGOVWebsitesLinks')
	website_table_items = website_table.find_all('a', href=True)
	withDescription()

print(Fore.RED + '[*] Saving only domains to domains.txt' + Style.RESET_ALL)
onlyDomains()
	#!/usr/bin/env python

	import re
	import os
	import string
	import requests
	import colorama
	from colorama import Fore, Style
	from bs4 import BeautifulSoup


	def withDescription():
	for website_name in website_table_items:
	web = website_name.contents[-1].strip()
	print(website_name['href'], Fore.GREEN + '"' + web + '"' + Style.RESET_ALL)
	urls = website_name['href'] + ', "' + web + '"'
	with open('all.txt', 'a') as out:
	out.write(urls + '\n')



	def onlyDomains():
	with open("all.txt") as lines:
	for i in lines:
	url = i.split(",")[0]
	domain = url.split("//")[-1].split("/")[0]
	# print(Fore.GREEN + domain + Style.RESET_ALL)

	with open ('domains-temp.txt', 'a') as temp:
	temp.write(domain + '\n')

	print(Fore.RED + '[*] Sorting and removing duplicates.' + Style.RESET_ALL)
	lines_seen = set()
	outfile = open('domains.txt', 'w')
	for line in open('domains-temp.txt', 'r'):
	if line not in lines_seen:
	outfile.write(line)
	lines_seen.add(line)
	outfile.close()
	os.remove('domains-temp.txt')


	alphabet = ['0-9']
	for i in string.ascii_uppercase:
	alphabet.append(i)

	for char in alphabet:
	page = requests.get('https://www.defense.gov/Resources/Military-Departments/A-Z-List/?page={}'.format(char))
	soup = BeautifulSoup(page.text, 'html.parser')
	website_table = soup.find(class_='DGOVWebsitesLinks')
	website_table_items = website_table.find_all('a', href=True)
	withDescription()

	print(Fore.RED + '[*] Saving only domains to domains.txt' + Style.RESET_ALL)
	onlyDomains()