Skip to content

Instantly share code, notes, and snippets.

@JosephShering
Created September 8, 2014 21:21
Show Gist options
  • Save JosephShering/94f3a022df46ecd7cd5b to your computer and use it in GitHub Desktop.
Save JosephShering/94f3a022df46ecd7cd5b to your computer and use it in GitHub Desktop.
Esquire Scraper
import requests, json
from bs4 import BeautifulSoup
# from pymongo import MongoClient
from pprint import PrettyPrinter
pprint = PrettyPrinter(indent=3)
# client = MongoClient('localhost', 27017)
bartender = client.bartender
drinks = bartender.drinks
def scrape():
html = requests.get("http://www.esquire.com/drinks/drinks-full-list/");
soup = BeautifulSoup(html.text)
results = soup.find_all(class_='result')
results_alt = soup.find_all(class_='result_alt')
drinks = [];
HOST = 'http://www.esquire.com'
for res in (results + results_alt):
# Get the name
result_content = res.find(class_='result_content')
name = result_content.h2.a.string
drink = {
'name' : name,
'ingredients' : [],
'instructions' : '',
'description' : '',
'image_url' : ''
}
#Grab the image url for later linking
img = res.select('> a > img')
drink['image_url'] = HOST + img[0]['src']
description_uri = result_content.h2.a['href']
# Make request to grab the description
description_page = requests.get(HOST + description_uri)
desc_soup = BeautifulSoup(description_page.text)
# Grabs the instructions
instructions = desc_soup.select('#instruction_container p')
for instruc in instructions:
drink['instructions'] += "\n" + instruc.text
# Grab the description
description = desc_soup.select('#drink_commentary > p')
for desc in description:
if desc is not None:
drink['description'] += "\n" + desc.text
ingredients = result_content.select('ul > li')
for ingred in ingredients:
drink['ingredients'].append(ingred.text)
pprint.pprint(drink)
drinks.append(drink)
# Put all those drinks into a file
with open('drinks.json', 'w') as f:
json.dump(drinks, f)
if __name__ == '__main__':
scrape()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment