Jonah Blumstein JBlumstein

## socrata_nyc_parking_tickets_get.py
#import librarires
import numpy as np
import pandas as pd
import datetime
import urllib

from bokeh.plotting import *
from bokeh.models import HoverTool
from collections import OrderedDict

## gist:2d8d69652ad34f9a9c6766ceb3fc4630
#script to add genre, playcount, listener count, album duration, and number of songs information from lastfm
#to all album reviews scraped from pitchfork before the site's redesign

import requests
import json
import pandas as pd
import time

#load in table of album reviews metadata (album name, artist, review date, and rating)
album_table = pd.read_csv("pitchforkalbumreviews.csv")

## gist:d475f62e686e0d55ab56dae1a0503211
import urllib.request as ur #url.request lib for handling the url
from bs4 import BeautifulSoup #bs for parsing the page
import requests
from lxml import html
import numpy as np
import pandas as pd

#get index pages for album reviews

#base url to add to

## Pitchfork Scraper
#This scraper finds album review titles, dates, and scores from Pitchfork. It scrapes the child pages of the object-grid css class on http://pitchfork.com/reviews/albums/1 (...n) and puts them into a list of lists, a list of dicts, and a Pandas dataframe.
#The second half of this script converts the list of list that results from the first half and finds the average album review score for each month and puts it into a list of dicts and a Pandas dataframe.
#Note that this scraper takes a long time to run because it uses BeautifulSoup to scrape each parent page (the child pages are scraped using xpath). If you want a demo, lower the number in the while loop to something like 3 or 4.
#As of March 2015, there are 818 pages of album reviews.

import urllib.request as ur #url.request lib for handling the url
from bs4 import BeautifulSoup #bs for parsing the page
import requests
from lxml import html
import numpy

## Epicurious scraper
#For a homework assignment to ask the user what ingredients they want to use in a recipe, go to the epicurious results page, select the first result, and return the recipe name, description, ingredients, and instructions.

import urllib.request as ur #url.request lib for handling the url
from bs4 import BeautifulSoup #bs for parsing the page

def recipegetter(searchterms):
    searchterms_nocommas = searchterms.replace(",","")
    urlslug = searchterms_nocommas.replace(" ","+")
    baseurl = "http://www.epicurious.com/tools/searchresults?search="
    fullurl = baseurl+urlslug

## BuzzFeed Scraper
from lxml import html
import requests
import pandas

root_url = 'http://www.buzzfeed.com/'
page_ids = ['bensmith', 'wtf', 'omg', '1uapps', 'amazon', 'amazonfiretv', 'discovercard', 'verizonholiday', 'autotraderdotcom','amazonkindle', 'jackdaniels', 'ibm', 'microsoftbing', 'wakeupcall', 'nhtsagov', 'intelinnovationslab', 'volvo', 'yahoo', 'walgreens', 'directv', 'motorolamotox', 'android', 'fruitwater', 'directvcinema', 'curve', 'loft', 'ford', 'lol', 'cute', 'fail', 'quizzes', 'videos', 'entertainment', 'buzz', 'mbvd','hayesbrown','jimdalrympleii','alexcampbell','kirstenking','christianzamora','candacelowry','kristinchirico','polls','alannaokun','jessicaprobus','katenocera','evanmcsan','mathewzeitlin','mikegiglio','adolfoflores', 'mollyhensleyclancy','chrisgeidner','jasonwells','andrewkaczynski','emilyorley','claudiakoerner','ryanhatesthis','sapna','lindseyadler','tasneemnashrulla','dominicholden','rossalynwarren','maryanngeorgantopoulos','nicholasmedinamora','ashleyford','adamdavis','lyapalater','alexalvarez

## Reddit posts by domain
#note that this script takes a couple minutes to run

import pandas
import time
import json
import requests
import datetime
import sys

#Enter your reddit username and password here
	#import librarires
	import numpy as np
	import pandas as pd
	import datetime
	import urllib

	from bokeh.plotting import *
	from bokeh.models import HoverTool
	from collections import OrderedDict
	#script to add genre, playcount, listener count, album duration, and number of songs information from lastfm
	#to all album reviews scraped from pitchfork before the site's redesign

	import requests
	import json
	import pandas as pd
	import time

	#load in table of album reviews metadata (album name, artist, review date, and rating)
	album_table = pd.read_csv("pitchforkalbumreviews.csv")
	import urllib.request as ur #url.request lib for handling the url
	from bs4 import BeautifulSoup #bs for parsing the page
	import requests
	from lxml import html
	import numpy as np
	import pandas as pd

	#get index pages for album reviews

	#base url to add to
	#This scraper finds album review titles, dates, and scores from Pitchfork. It scrapes the child pages of the object-grid css class on http://pitchfork.com/reviews/albums/1 (...n) and puts them into a list of lists, a list of dicts, and a Pandas dataframe.
	#The second half of this script converts the list of list that results from the first half and finds the average album review score for each month and puts it into a list of dicts and a Pandas dataframe.
	#Note that this scraper takes a long time to run because it uses BeautifulSoup to scrape each parent page (the child pages are scraped using xpath). If you want a demo, lower the number in the while loop to something like 3 or 4.
	#As of March 2015, there are 818 pages of album reviews.

	import urllib.request as ur #url.request lib for handling the url
	from bs4 import BeautifulSoup #bs for parsing the page
	import requests
	from lxml import html
	import numpy
	#For a homework assignment to ask the user what ingredients they want to use in a recipe, go to the epicurious results page, select the first result, and return the recipe name, description, ingredients, and instructions.

	import urllib.request as ur #url.request lib for handling the url
	from bs4 import BeautifulSoup #bs for parsing the page

	def recipegetter(searchterms):
	searchterms_nocommas = searchterms.replace(",","")
	urlslug = searchterms_nocommas.replace(" ","+")
	baseurl = "http://www.epicurious.com/tools/searchresults?search="
	fullurl = baseurl+urlslug
	from lxml import html
	import requests
	import pandas

	root_url = 'http://www.buzzfeed.com/'
	page_ids = ['bensmith', 'wtf', 'omg', '1uapps', 'amazon', 'amazonfiretv', 'discovercard', 'verizonholiday', 'autotraderdotcom','amazonkindle', 'jackdaniels', 'ibm', 'microsoftbing', 'wakeupcall', 'nhtsagov', 'intelinnovationslab', 'volvo', 'yahoo', 'walgreens', 'directv', 'motorolamotox', 'android', 'fruitwater', 'directvcinema', 'curve', 'loft', 'ford', 'lol', 'cute', 'fail', 'quizzes', 'videos', 'entertainment', 'buzz', 'mbvd','hayesbrown','jimdalrympleii','alexcampbell','kirstenking','christianzamora','candacelowry','kristinchirico','polls','alannaokun','jessicaprobus','katenocera','evanmcsan','mathewzeitlin','mikegiglio','adolfoflores', 'mollyhensleyclancy','chrisgeidner','jasonwells','andrewkaczynski','emilyorley','claudiakoerner','ryanhatesthis','sapna','lindseyadler','tasneemnashrulla','dominicholden','rossalynwarren','maryanngeorgantopoulos','nicholasmedinamora','ashleyford','adamdavis','lyapalater','alexalvarez
	#note that this script takes a couple minutes to run

	import pandas
	import time
	import json
	import requests
	import datetime
	import sys

	#Enter your reddit username and password here