Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
recalls
import csv
import requests
from BeautifulSoup import BeautifulSoup
def scrapePage(url):
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html)
table = soup.find('table', attrs={'cellpadding': 3})
list_of_rows = []
for row in table.findAll('tr')[1:]:
list_of_cells = []
for cell in row.findAll('td'):
text = cell.text.replace(' ', '').encode('ascii', 'ignore')
list_of_cells.append(text)
list_of_rows.append(list_of_cells)
print list_of_rows
writer.writerows([list_of_cells])
outfile = open("./recalls2.csv", "wb")
writer = csv.writer(outfile)
writer.writerow(["Date", "Brand Name", "Product Description", "Reason", "Company"])
for number in list(range(1,9)):
scrapePage('http://www.fda.gov/Safety/Recalls/ArchiveRecalls/2015/default.htm?Page={0}'.format(number))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment