Skip to content

Instantly share code, notes, and snippets.

@neil90
Created May 11, 2015 21:59
Show Gist options
  • Save neil90/982a38daed295f5f7f73 to your computer and use it in GitHub Desktop.
Save neil90/982a38daed295f5f7f73 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
def make_soup(url):
response = requests.get(website)
return BeautifulSoup(response.text)
def initial_links(website):
soup = make_soup(website)
links = [link["href"] for link in soup.findAll('a')
if link.getText() in map(str, range(2009, 2016))]
return links
if __name__ == '__main__':
website = 'http://www.presidency.ucsb.edu/sou.php'
links = initial_links(website) # gets alll links
# Now Im just looking at the first link and trying to figure out how to get the text
links0 = make_soup(links[0])
text = links0.find('span', {'class' : 'displaytext'} )
print text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment