Skip to content

Instantly share code, notes, and snippets.

@basilesimon
Created January 13, 2015 21:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save basilesimon/c3ca04dd6971c7b70a57 to your computer and use it in GitHub Desktop.
Save basilesimon/c3ca04dd6971c7b70a57 to your computer and use it in GitHub Desktop.
THE SOLUTION
from bs4 import BeautifulSoup
from urllib2 import urlopen
BASE_URL = "http://www.chicagoreader.com"
def make_soup(url):
html = urlopen(url).read()
return BeautifulSoup(html, "lxml")
def get_category_links(section_url):
soup = make_soup(section_url)
boccat = soup.find("dl", "boccat")
category_links = [BASE_URL + dd.a["href"] for dd in boccat.findAll("dd")]
return category_links
if __name__ == '__main__':
food_n_drink = ("http://www.chicagoreader.com/chicago/"
"best-of-chicago-2011-food-drink/BestOf?oid=4106228")
categories = get_category_links(food_n_drink)
print get_category_links("http://www.chicagoreader.com/chicago/"
"best-of-chicago-2011-food-drink/BestOf?oid=4106228")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment