Skip to content

Instantly share code, notes, and snippets.

@nirinium
Last active March 23, 2018 18:21
Show Gist options
  • Save nirinium/1898c86fcde3a8b6ae30e371bb98f9d6 to your computer and use it in GitHub Desktop.
Save nirinium/1898c86fcde3a8b6ae30e371bb98f9d6 to your computer and use it in GitHub Desktop.
import os, sys, lxml, re, ssl, time
from bs4 import BeautifulSoup, SoupStrainer
from urllib.request import urlopen, urlretrieve, urljoin, URLError, HTTPError
def progressDL(count, blockSize, totalSize):
percent = int(count*blockSize*100/totalSize)
sys.stdout.write("\r" + "...%d%% " % percent)
sys.stdout.flush()
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
base_url = 'http://iasecontent.disa.mil/'
URL = 'https://iase.disa.mil/stigs/Pages/a-z.aspx'
OUTPUT_DIR = 'stigs'
html_page = urlopen(URL)
only_table_links = SoupStrainer("table")
soup = BeautifulSoup(html_page, 'lxml', parse_only=only_table_links)
linksList = []
def url_p_join():
if not href.startswith('h'):
href = urljoin(base_url, href)
print('join text')
for link in soup.find_all('a', attrs={'href': re.compile("(u_|U_)")}):
linksList.append(link.get('href') )
print('Total STIGs:', len(linksList) )
u = urlopen(URL)
try: #PAGE 1 of STIG site
html = u.read().decode('utf-8')
finally:
u.close()
soup = BeautifulSoup(html, "lxml")
for link in soup.find_all('a', attrs={'href': re.compile("(u_|U_)")}):
href = link.get('href')
if not any(href.endswith(x) for x in linksList): #pulls links from [LIST] linksList
continue
else:
url_p_join
filename = os.path.join(OUTPUT_DIR, href.rsplit('/', 1)[-1])
print("> %s to \%s..." % (href, OUTPUT_DIR) )
relativeurls = []
try:
urlretrieve(href, filename, reporthook = progressDL)
except OSError as join:
print('')
except ValueError as error:
relativeurls.append(url_p_join)
print(ValueError)
except HTTPError as error:
print(HTTPError.filename, 'Error!')
except URLError as error:
print(URLError)
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment