bdewilde/friedman_article_basic_scrape.py

## friedman_article_basic_scrape.py
import bs4
import requests

# GET html from NYT server, and parse it
response = requests.get('http://www.nytimes.com/2013/04/07/opinion/sunday/friedman-weve-wasted-our-timeout.html')
soup = bs4.BeautifulSoup(response.text)

article = ''

# select all tags containing article text, then extract the text from each
paragraphs = soup.find_all('p', itemprop='articleBody')
for paragraph in paragraphs:
    article += paragraph.get_text()
	import bs4
	import requests

	# GET html from NYT server, and parse it
	response = requests.get('http://www.nytimes.com/2013/04/07/opinion/sunday/friedman-weve-wasted-our-timeout.html')
	soup = bs4.BeautifulSoup(response.text)

	article = ''

	# select all tags containing article text, then extract the text from each
	paragraphs = soup.find_all('p', itemprop='articleBody')
	for paragraph in paragraphs:
	article += paragraph.get_text()