Skip to content

Instantly share code, notes, and snippets.

@return0927
Created September 26, 2017 12:04
Show Gist options
  • Save return0927/474f63500c9c9452a2f6aa865a923ac5 to your computer and use it in GitHub Desktop.
Save return0927/474f63500c9c9452a2f6aa865a923ac5 to your computer and use it in GitHub Desktop.
_URL = "https://technet.microsoft.com/library/security/4010983"
import requests, bs4
reqBody = requests.get(_URL)
soup = bs4.BeautifulSoup(reqBody.text, "html.parser")
_BODY = soup.find("div", attrs={"id":"mainBody"})
_Tags = _BODY.find_all(['h2','p','span'])
_Splitter = []
for _TAG in _Tags:
_Legacy = _TAG.get_attribute_list("class")
if _Legacy == [None]:
pass
elif _Legacy == ['subheading']:
_Splitter.append(_Tags.index(_TAG))
_Result = []
for _INDEX_NO in range( len(_Splitter) ):
if _INDEX_NO == len(_Splitter)-1:
_Result.append( _Tags[ _Splitter[_INDEX_NO] : ] )
else:
_Result.append( _Tags[ _Splitter[_INDEX_NO] : _Splitter[_INDEX_NO+1] ] )
print("\n\n===================================\n\n".join([ "\n".join([ y.text for y in x]) for x in _Result]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment