Skip to content

Instantly share code, notes, and snippets.

@rishimukherjee
Created December 7, 2012 20:28
Show Gist options
  • Save rishimukherjee/4236263 to your computer and use it in GitHub Desktop.
Save rishimukherjee/4236263 to your computer and use it in GitHub Desktop.
flipKart Scraper
import bs4
import re
import urllib2
import sys
item = sys.argv[1:]
item = '+'.join(item)
link = 'http://www.flipkart.com/search/a/all?query={0}&vertical=all&dd=0&autosuggest[as]=off&autosuggest[as-submittype]=entered&autosuggest[as-grouprank]=0&autosuggest[as-overallrank]=0&autosuggest[orig-query]=&autosuggest[as-shown]=off&Search=%C2%A0&otracker=start&_r=YSWdYULYzr4VBYklfpZRbw--&_l=pMHn9vNCOBi05LKC_PwHFQ--&ref=a2c6fadc-2e24-4412-be6a-ce02c9707310&selmitem=All+Categories'.format(item)
response = urllib2.urlopen(link)
thePage = response.read()
soup = bs4.BeautifulSoup(thePage)
firstBlockSoup = soup.find('div', attrs={'class': 'size1of4 fk-medium-atom unit'})
if not firstBlockSoup: print "Item Not Found"
else:
titleSoup = firstBlockSoup.find('a', attrs={'class': 'title tpadding5 fk-anchor-link'})
priceSoup = firstBlockSoup.find('span', attrs={'class': 'price'})
detailSoup = firstBlockSoup.find('ul', attrs={'class': 'fk-extra-details'})
DetailAvailable = False
ValueTable = {'title': '', 'price':0, 'details': 'No Details Available'}
if len(list(detailSoup.children)) > 1: DetailAvailable = True
if DetailAvailable:
st = u''
for detail in detailSoup.findAll('span', attrs={'class': 'itm'}):
st += detail.string + u'\n'
ValueTable['details'] = st
ValueTable['title'] = titleSoup.attrs['title']
ValueTable['price'] = int(re.findall('\d+', priceSoup.string)[0])
print ValueTable
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment