Skip to content

Instantly share code, notes, and snippets.

@michelleminkoff
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save michelleminkoff/377900bf7288a8871c34 to your computer and use it in GitHub Desktop.
Save michelleminkoff/377900bf7288a8871c34 to your computer and use it in GitHub Desktop.
recalls
import csv
import requests
from BeautifulSoup import BeautifulSoup
def scrapePage(url):
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html)
table = soup.find('table', attrs={'cellpadding': 3})
list_of_rows = []
for row in table.findAll('tr')[1:]:
list_of_cells = []
for cell in row.findAll('td'):
text = cell.text.replace(' ', '').encode('ascii', 'ignore')
list_of_cells.append(text)
list_of_rows.append(list_of_cells)
print list_of_rows
writer.writerows([list_of_cells])
outfile = open("./recalls2.csv", "wb")
writer = csv.writer(outfile)
writer.writerow(["Date", "Brand Name", "Product Description", "Reason", "Company"])
for number in list(range(1,9)):
scrapePage('http://www.fda.gov/Safety/Recalls/ArchiveRecalls/2015/default.htm?Page={0}'.format(number))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment