melodykramer/scraping 2018 nieman predictions

## scraping 2018 nieman predictions
##libraries

import urllib2
from bs4 import BeautifulSoup
import csv


import sys
reload(sys)
sys.setdefaultencoding('utf8')

## getting the url we want to scrape

nieman_page = 'http://www.niemanlab.org/2017/12/the-rise-of-skeptical-reading/'

## querying the page and returning the html to the variable page

page = urllib2.urlopen(nieman_page).read()

## parsing html using BeautifulSoup and storing html in variable soup

soup = BeautifulSoup(page, 'html.parser')

## writes to csv

file = csv.writer(open("Predictions.csv", "w"))
file.writerow(["Headline", "Blurb", "Byline", "Text"])


## prints headline

for headline in soup.findAll(attrs={'class' : 'simple-headline'}):
	headline = headline.text

## prints blurb

for blurb in soup.findAll(attrs={'class' : 'simple-post-deck'}):
	blurb = blurb.text

## prints byline

for author in soup.findAll(attrs={'class' : 'predix-byline'}):
	author = author.text

## prints text + author bio

for prediction in soup.findAll(attrs={'class' : 'predix-storybody'}):
	prediction = prediction.text

	file.writerow([headline, blurb, author, prediction])
	##libraries

	import urllib2
	from bs4 import BeautifulSoup
	import csv


	import sys
	reload(sys)
	sys.setdefaultencoding('utf8')

	## getting the url we want to scrape

	nieman_page = 'http://www.niemanlab.org/2017/12/the-rise-of-skeptical-reading/'

	## querying the page and returning the html to the variable page

	page = urllib2.urlopen(nieman_page).read()

	## parsing html using BeautifulSoup and storing html in variable soup

	soup = BeautifulSoup(page, 'html.parser')

	## writes to csv

	file = csv.writer(open("Predictions.csv", "w"))
	file.writerow(["Headline", "Blurb", "Byline", "Text"])


	## prints headline

	for headline in soup.findAll(attrs={'class' : 'simple-headline'}):
	headline = headline.text

	## prints blurb

	for blurb in soup.findAll(attrs={'class' : 'simple-post-deck'}):
	blurb = blurb.text

	## prints byline

	for author in soup.findAll(attrs={'class' : 'predix-byline'}):
	author = author.text

	## prints text + author bio

	for prediction in soup.findAll(attrs={'class' : 'predix-storybody'}):
	prediction = prediction.text

	file.writerow([headline, blurb, author, prediction])