nirinium/stigpy.py

## stigpy.py
import os, sys, lxml, re, ssl, time
from bs4 import BeautifulSoup, SoupStrainer
from urllib.request import urlopen, urlretrieve, urljoin, URLError, HTTPError

def progressDL(count, blockSize, totalSize):
      percent = int(count*blockSize*100/totalSize) #FORMULA TO CALCULATE DOWNLOAD PROGRESS
      sys.stdout.write("\r" + "...%d%% " % percent) #PRINTS PROGRESS TO CONSOLE
      sys.stdout.flush() #WRITES EVERYTHING TO TERMINAL

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

base_url = 'http://iasecontent.disa.mil/'
URL = 'https://iase.disa.mil/stigs/Pages/a-z.aspx'
OUTPUT_DIR = 'stigs'

html_page = urlopen(URL)
only_table_links = SoupStrainer("table")
soup = BeautifulSoup(html_page, 'lxml', parse_only=only_table_links)
linksList = []

def url_p_join():
     if not href.startswith('h'):
        href = urljoin(base_url, href)
     print('join text')

for link in soup.find_all('a', attrs={'href': re.compile("(u_|U_)")}):
    linksList.append(link.get('href') )

print('Total STIGs:', len(linksList) )

u = urlopen(URL)
try: #PAGE 1 of STIG site
    html = u.read().decode('utf-8')
finally:
    u.close()

soup = BeautifulSoup(html, "lxml")
for link in soup.find_all('a', attrs={'href': re.compile("(u_|U_)")}):
    href = link.get('href')
    if not any(href.endswith(x) for x in linksList): #pulls links from [LIST] linksList
        continue
    else:
        url_p_join

    filename = os.path.join(OUTPUT_DIR, href.rsplit('/', 1)[-1])
    print("> %s to \%s..." % (href, OUTPUT_DIR) )

    relativeurls = []
    try:
        urlretrieve(href, filename, reporthook = progressDL)
    except OSError as join:
        print('')
    except ValueError as error:
        relativeurls.append(url_p_join)
        print(ValueError)
    except HTTPError as error:
        print(HTTPError.filename, 'Error!')
    except URLError as error:
        print(URLError)
    continue
	import os, sys, lxml, re, ssl, time
	from bs4 import BeautifulSoup, SoupStrainer
	from urllib.request import urlopen, urlretrieve, urljoin, URLError, HTTPError

	def progressDL(count, blockSize, totalSize):
	percent = int(countblockSize100/totalSize) #FORMULA TO CALCULATE DOWNLOAD PROGRESS
	sys.stdout.write("\r" + "...%d%% " % percent) #PRINTS PROGRESS TO CONSOLE
	sys.stdout.flush() #WRITES EVERYTHING TO TERMINAL

	try:
	_create_unverified_https_context = ssl._create_unverified_context
	except AttributeError:
	pass
	else:
	ssl._create_default_https_context = _create_unverified_https_context

	base_url = 'http://iasecontent.disa.mil/'
	URL = 'https://iase.disa.mil/stigs/Pages/a-z.aspx'
	OUTPUT_DIR = 'stigs'

	html_page = urlopen(URL)
	only_table_links = SoupStrainer("table")
	soup = BeautifulSoup(html_page, 'lxml', parse_only=only_table_links)
	linksList = []

	def url_p_join():
	if not href.startswith('h'):
	href = urljoin(base_url, href)
	print('join text')

	for link in soup.find_all('a', attrs={'href': re.compile("(u_\|U_)")}):
	linksList.append(link.get('href') )

	print('Total STIGs:', len(linksList) )

	u = urlopen(URL)
	try: #PAGE 1 of STIG site
	html = u.read().decode('utf-8')
	finally:
	u.close()

	soup = BeautifulSoup(html, "lxml")
	for link in soup.find_all('a', attrs={'href': re.compile("(u_\|U_)")}):
	href = link.get('href')
	if not any(href.endswith(x) for x in linksList): #pulls links from [LIST] linksList
	continue
	else:
	url_p_join

	filename = os.path.join(OUTPUT_DIR, href.rsplit('/', 1)[-1])
	print("> %s to \%s..." % (href, OUTPUT_DIR) )

	relativeurls = []
	try:
	urlretrieve(href, filename, reporthook = progressDL)
	except OSError as join:
	print('')
	except ValueError as error:
	relativeurls.append(url_p_join)
	print(ValueError)
	except HTTPError as error:
	print(HTTPError.filename, 'Error!')
	except URLError as error:
	print(URLError)
	continue