This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #import librarires | |
| import numpy as np | |
| import pandas as pd | |
| import datetime | |
| import urllib | |
| from bokeh.plotting import * | |
| from bokeh.models import HoverTool | |
| from collections import OrderedDict |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #script to add genre, playcount, listener count, album duration, and number of songs information from lastfm | |
| #to all album reviews scraped from pitchfork before the site's redesign | |
| import requests | |
| import json | |
| import pandas as pd | |
| import time | |
| #load in table of album reviews metadata (album name, artist, review date, and rating) | |
| album_table = pd.read_csv("pitchforkalbumreviews.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import urllib.request as ur #url.request lib for handling the url | |
| from bs4 import BeautifulSoup #bs for parsing the page | |
| import requests | |
| from lxml import html | |
| import numpy as np | |
| import pandas as pd | |
| #get index pages for album reviews | |
| #base url to add to |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #This scraper finds album review titles, dates, and scores from Pitchfork. It scrapes the child pages of the object-grid css class on http://pitchfork.com/reviews/albums/1 (...n) and puts them into a list of lists, a list of dicts, and a Pandas dataframe. | |
| #The second half of this script converts the list of list that results from the first half and finds the average album review score for each month and puts it into a list of dicts and a Pandas dataframe. | |
| #Note that this scraper takes a long time to run because it uses BeautifulSoup to scrape each parent page (the child pages are scraped using xpath). If you want a demo, lower the number in the while loop to something like 3 or 4. | |
| #As of March 2015, there are 818 pages of album reviews. | |
| import urllib.request as ur #url.request lib for handling the url | |
| from bs4 import BeautifulSoup #bs for parsing the page | |
| import requests | |
| from lxml import html | |
| import numpy |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #For a homework assignment to ask the user what ingredients they want to use in a recipe, go to the epicurious results page, select the first result, and return the recipe name, description, ingredients, and instructions. | |
| import urllib.request as ur #url.request lib for handling the url | |
| from bs4 import BeautifulSoup #bs for parsing the page | |
| def recipegetter(searchterms): | |
| searchterms_nocommas = searchterms.replace(",","") | |
| urlslug = searchterms_nocommas.replace(" ","+") | |
| baseurl = "http://www.epicurious.com/tools/searchresults?search=" | |
| fullurl = baseurl+urlslug |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from lxml import html | |
| import requests | |
| import pandas | |
| root_url = 'http://www.buzzfeed.com/' | |
| page_ids = ['bensmith', 'wtf', 'omg', '1uapps', 'amazon', 'amazonfiretv', 'discovercard', 'verizonholiday', 'autotraderdotcom','amazonkindle', 'jackdaniels', 'ibm', 'microsoftbing', 'wakeupcall', 'nhtsagov', 'intelinnovationslab', 'volvo', 'yahoo', 'walgreens', 'directv', 'motorolamotox', 'android', 'fruitwater', 'directvcinema', 'curve', 'loft', 'ford', 'lol', 'cute', 'fail', 'quizzes', 'videos', 'entertainment', 'buzz', 'mbvd','hayesbrown','jimdalrympleii','alexcampbell','kirstenking','christianzamora','candacelowry','kristinchirico','polls','alannaokun','jessicaprobus','katenocera','evanmcsan','mathewzeitlin','mikegiglio','adolfoflores', 'mollyhensleyclancy','chrisgeidner','jasonwells','andrewkaczynski','emilyorley','claudiakoerner','ryanhatesthis','sapna','lindseyadler','tasneemnashrulla','dominicholden','rossalynwarren','maryanngeorgantopoulos','nicholasmedinamora','ashleyford','adamdavis','lyapalater','alexalvarez |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #note that this script takes a couple minutes to run | |
| import pandas | |
| import time | |
| import json | |
| import requests | |
| import datetime | |
| import sys | |
| #Enter your reddit username and password here |