Create a gist now

Instantly share code, notes, and snippets.

A simple example of a scraperwiki python scraper for a recent Hacks/Hackers L.A. meetup...
#!/usr/bin/env python
import logging
import re
import time, datetime
import mechanize
from BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
# log everything and send to stderr
# new instance of mechanize browser
mech = mechanize.Browser()
# gather each url from target page and write to list
def gather_main_content(url_to_scrape):
logging.debug('running gather_main_content function')
page_scrape =
html_scrape =
soup_scrape = BeautifulSoup(html_scrape, convertEntities=BeautifulSoup.HTML_ENTITIES)
data_table = soup_scrape.findAll('table', {'class': 'incident_table'})[1:]
for table in data_table:
data_rows = table.findAll('tr')[1:]
fire_name = extract_data_from_cells(data_rows[0])
county = extract_data_from_cells(data_rows[1])
unique_id = fire_name + '-hacks-hackers'
logging.debug(fire_name + ' - ' + county)
# Saving data
unique_keys = ['id']
fire_narrative = {
'id': unique_id,
'name': fire_name,
'county': county,
}, fire_narrative)
def extract_data_from_cells(row_name):
target_cell = row_name.findAll('td')
target_data = target_cell[1].text.encode('utf-8')
return target_data
if __name__ == "__main__": gather_main_content('')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment