Skip to content

Instantly share code, notes, and snippets.

@rishimukherjee
Created December 8, 2012 09:49
Show Gist options
  • Save rishimukherjee/4239619 to your computer and use it in GitHub Desktop.
Save rishimukherjee/4239619 to your computer and use it in GitHub Desktop.
FlipKart All Mobile Scraper
import bs4
import re
import urllib2
import sys
from collections import defaultdict
start = 1
link = 'http://www.flipkart.com/mobiles/pr?p[0]=sort%3Dprice_asc&sid=tyy%2C4io&layout=grid&start={0}'.format(start)
response = urllib2.urlopen(link)
thePage = response.read()
soup = bs4.BeautifulSoup(thePage)
searchCount = soup.find('div', attrs={'id': 'searchCount'})
countTag = searchCount.find('span', attrs={'class': 'items'})
totalCount = int(countTag.string)
itemTable = open('flipKartNamePrice.txt', 'w')
while start <= totalCount:
link = 'http://www.flipkart.com/mobiles/pr?p[0]=sort%3Dprice_asc&sid=tyy%2C4io&layout=grid&start={0}'.format(start)
response = urllib2.urlopen(link)
soup = bs4.BeautifulSoup(thePage)
#allMobiles = soup.find('div', attrs={'id': 'products', 'class': '4-per-row'})
allItemNames = [name.string.strip() for name in soup.findAll('a', attrs={'class': 'fk-anchor-link'})]
allItemPrice = [price.string.strip() for price in soup.findAll('span', attrs={'class': 'price final-price'})]
allItemPrice = [int(price.split()[1]) for price in allItemPrice]
for i in range(len(allItemNames)-len(allItemPrice)):
allItemPrice.append(0)
st = ''
for i in range(len(allItemNames)):
st += str(allItemNames[i]) + '----->' + str(allItemPrice[i]) + '\n'
itemTable.write(st)
start += 20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment