Skip to content

Instantly share code, notes, and snippets.

@scuerda
Created June 29, 2015 00:46
Show Gist options
  • Save scuerda/1c88d7eb8848f9d01fd0 to your computer and use it in GitHub Desktop.
Save scuerda/1c88d7eb8848f9d01fd0 to your computer and use it in GitHub Desktop.
Ben and Jerry's Flavors and Release Date
from bs4 import BeautifulSoup
import urllib2
import csv
html = urllib2.urlopen("http://www.benjerry.com/flavors")
soup = BeautifulSoup(html)
flavors = soup.select(".flavor-item")
flavors = flavors[1:]
flavorList = []
for f in flavors:
link = "http://www.benjerry.com" + f.find("a")['href']
try:
flavorName = f.select(".description")[0].select("h4")[0].contents[0]
flavorName = flavorName.encode('ascii', 'ignore')
print flavorName
flavorpage = urllib2.urlopen(link)
flavorSoup = BeautifulSoup(flavorpage)
try:
flavorDate = flavorSoup.select(".big-date")[0].contents[0]
newFlavor = dict()
newFlavor['flavor'] = flavorName
newFlavor['release date'] = flavorDate
flavorList.append(newFlavor)
except IndexError:
pass
except IndexError:
pass
with open("flavors.csv", "w") as csvfile:
fieldnames = ['flavor', 'release date']
writer = csv.DictWriter(csvfile,fieldnames=fieldnames)
writer.writeheader()
for row in flavorList:
row['flavor'] = row['flavor'].encode('ascii', 'ignore')
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment