Skip to content

Instantly share code, notes, and snippets.

@samrat
Created April 21, 2011 16:59
Show Gist options
  • Save samrat/934987 to your computer and use it in GitHub Desktop.
Save samrat/934987 to your computer and use it in GitHub Desktop.
Grabs headlines and their links from Hacker news
import urllib2
from BeautifulSoup import BeautifulSoup
HN_url = "http://news.ycombinator.com"
def get_page():
page_html = urllib2.urlopen(HN_url)
return page_html
def get_stories(content):
soup = BeautifulSoup(content)
titles_html =[]
for td in soup.findAll("td", { "class":"title" }):
titles_html += td.findAll("a")
return titles_html
for title_a in get_stories(get_page()):
title = title_a.string.strip()
url = title_a['href']
print "*", title, url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment