Skip to content

Instantly share code, notes, and snippets.

@nakami
Last active March 13, 2017 23:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nakami/81abb34c4d0e7fe752128fccc6a5b2e9 to your computer and use it in GitHub Desktop.
Save nakami/81abb34c4d0e7fe752128fccc6a5b2e9 to your computer and use it in GitHub Desktop.
beautifulsoup_gw2wiki.py
import requests
from bs4 import BeautifulSoup
s.mount(url, HTTPAdapter(max_retries=5))
response = requests.Response()
try:
response = requests.get("https://wiki.guildwars2.com/wiki/Eternity", stream=True, verify=True, headers=False)
except requests.exceptions.RequestException as e: # catch exception but do nothing
print (e)
soup = BeautifulSoup(response.text, 'html.parser')
soup_buf = "foo" # non-empty
for p in soup.find(id="mw-content-text"):
#if p.find('div'):
# continue
try:
if p['id'] is None:
continue
except KeyError:
continue
if p['id']=='toc':
break
soup_buf = p.text
# p is now a wanted p
with open("debug.txt", "w") as fh:
fh.write(soup_buf)
print(soup_buf) # prints shit i don't want yet
return "foo"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment