Skip to content

Instantly share code, notes, and snippets.

@jjjjeeffff
Created August 20, 2009 05:01
Show Gist options
  • Save jjjjeeffff/170850 to your computer and use it in GitHub Desktop.
Save jjjjeeffff/170850 to your computer and use it in GitHub Desktop.
from BeautifulSoup import BeautifulSoup
import urllib2
doc = urllib2.urlopen("http://www.modifiedcartrader.com/information/newestCars.aspx")
soup = BeautifulSoup(doc)
table = soup.find('table', id='ctl00_ContentPlaceHolder1_ItemList1_GridView1')
rows = table.findAll('tr')[3:]
vehs = []
for row in table.findAll('tr')[3:13]:
veh = {}
cells = row.findAll('td')
link = row.find('a')
link_href = link.get('href')[2:]
veh['link_text'] = link.contents[0]
veh['link_href'] = 'http://www.modifiedcartrader.com' + link_href
veh['img'] = row.findAll('img')[0].get('src')
miles = 0
hp = 0
span = cells[4].find('span')
if (len(span) > 0):
b = span.find('b')
if (b):
hp = b.contents[0]
span = cells[3].find('span')
if (len(span) > 0):
miles = span.contents[0]
veh['miles'] = miles
veh['hp'] = hp
vehs.append(veh)
print vehs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment