Skip to content

Instantly share code, notes, and snippets.

@jkotra
Last active July 16, 2018 06:13
Show Gist options
  • Save jkotra/787e42a7d9b8f04f9cbf5026ed7c064b to your computer and use it in GitHub Desktop.
Save jkotra/787e42a7d9b8f04f9cbf5026ed7c064b to your computer and use it in GitHub Desktop.
Scrap from Justdial (JD)
from bs4 import BeautifulSoup
import csv
import requests
###################################
# Jagadeesh Kotra #
# hello@jagadeesh.me #
###################################
scrap = []
numbers = []
#Number equalent to icon
zero = "icon-acb"
one = "icon-yz"
two = "icon-wx"
three = "icon-vu"
four = "icon-ts"
five = "icon-rq"
six = "icon-po"
seven = "icon-nm"
eight = "icon-lk"
nine = "icon-ji"
ten = "icon-ji"
plus = "icon-dc"
fbrac = "icon-fe"
fbrac_end = "icon-hg"
dash = "icon-ba"
def write_to_csv(number_list):
with open('data.csv', 'a', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ',
quotechar=',', quoting=csv.QUOTE_MINIMAL)
for i in range(len(number_list)):
csv_data = (str(number_list[i]).replace("+(91)-", ""))
spamwriter.writerow([csv_data])
def icontonum(icon):
if icon in zero: return 0
if icon in one: return 1
if icon in two: return 2
if icon in three: return 3
if icon in four: return 4
if icon in five: return 5
if icon in six: return 6
if icon in seven: return 7
if icon in eight: return 8
if icon in nine: return 9
if icon in ten: return 10
if icon in plus: return "+"
if icon in fbrac: return "("
if icon in fbrac_end: return ")"
if icon in dash: return "-"
return icon
reqhead = {
"Host": "www.justdial.com",
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Cookie": "", #put your cookie here!
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Cache-Control": "max-age=0"
}
reqhead_ajax = {
"Host": "www.justdial.com",
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0",
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://www.justdial.com/Hyderabad/Furniture-Showrooms/nct-10219686",
"X-FRSC-Token": "", #put your token here!
"X-Requested-With": "XMLHttpRequest",
"Cookie": "", #put your cookie here!
"Connection": "keep-alive"
}
a = requests.get("https://www.justdial.com/Hyderabad/Function-Halls/nct-10218516/page-50", headers=reqhead)
b = a.content
###for AJAX request,uncomment these lines###
# import json
# send_ajax = json.loads(b)
# print(send_ajax)
# ajax = send_ajax['markup']
soup = BeautifulSoup(b, "lxml")
contact = soup.find_all('p', {"class": "contact-info"})
for i in range(len(contact)):
a = contact[i].find('span')
z = a.find_all('span')
for i in range(len(z)):
x = z[i].attrs['class'][1]
num = icontonum(x)
scrap.append(num)
justdial_number = "".join(map(str, scrap))
numbers.append(justdial_number)
scrap = []
print(numbers)
write_to_csv(numbers)
@imumeshk
Copy link

Only scrapping 10 numbers

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment