Skip to content

Instantly share code, notes, and snippets.

@mrecachinas
Created May 7, 2017 16:08
Show Gist options
  • Save mrecachinas/a45cc590dce1e3a7e08978aac06f832e to your computer and use it in GitHub Desktop.
Save mrecachinas/a45cc590dce1e3a7e08978aac06f832e to your computer and use it in GitHub Desktop.
The drinking game involving Wikipedia
#!/usr/bin/env python
from __future__ import print_function
import urllib2
import httplib
from bs4 import BeautifulSoup, SoupStrainer
import string
import time
def get_wiki(link):
try:
response = urllib2.urlopen(link)
return response
except urllib2.URLError as e:
pass
except urllib2.HTTPError as e:
pass
except httplib.HTTPException as e:
pass
except Exception:
import traceback
print(traceback.format_exc())
def get_random_wiki():
random_link = "https://en.wikipedia.org/wiki/Special:Random"
return get_wiki(random_link)
def first_lowercase_link(wiki):
content = BeautifulSoup(wiki).find_all(id='mw-content-text')
print(content)
content = content[0]
prefix = "https://en.wikipedia.org"
for para in content.find_all('p'):
for link in para.find_all('a', href=True):
if link.text and link.text[0] in string.ascii_lowercase:
return (urllib2.urlparse.urljoin(prefix, link["href"]), link.text)
def links_to_topic(wiki, topic=""):
links = []
link = first_lowercase_link(wiki)
while link and link[1].lower() != topic:
links.append(link)
next_wiki = get_wiki(link[0])
link = first_lowercase_link(next_wiki)
if link:
links.append(link)
return links
def test():
MAX_ITER = 10
links = {}
for n in xrange(MAX_ITER):
random_wiki = get_random_wiki()
random_wiki_title = BeautifulSoup(random_wiki.read()).find(id="firstHeading").text
links[random_wiki_title] = links_to_topic(random_wiki, topic="philosophy")
print(links)
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment