Skip to content

Instantly share code, notes, and snippets.

View bahrammp's full-sized avatar

Bahram M. Esfahani bahrammp

  • Berlin, Germany
View GitHub Profile
@bahrammp
bahrammp / a.py
Last active August 6, 2018 16:38
import urllib3
import json
from pymongo import MongoClient
urllib3.disable_warnings()
from bs4 import BeautifulSoup
counter=0
def get_restaurants_list(page=1):
global counter
http = urllib3.PoolManager()
base_api_url = 'https://www.snappfood.ir/restaurant/?page={}'
r = http.request('GET', base_api_url.format(page))
soup = BeautifulSoup(r.data.decode('utf-8'),'lxml')
restaurant_links = soup.find_all('div',class_='kk-pp-title')
res = list()
from multiprocessing.dummy import Pool as ThreadPool
pool = ThreadPool(2)
restaurant_urls = pool.map(get_restaurants_list, range(1,288))
pool.close()
pool.join()
print(restaurant_urls[0]) # each item in restaurant_urls has 12 links
f = open('restaurants_links.txt','w+')
for i in restaurant_urls:
for j in i:
f.write(j)
f.write('\n')
f.close()
restaurant_vendor_name = list()
restaurant_urls = list()
f = open('restaurants_links.txt')
for line in f:
restaurant_urls.append(line)
f.close()
for i in restaurant_urls:
@bahrammp
bahrammp / a.py
Last active August 6, 2018 16:52
def get_comments_for_restaurant_page(restaurant_name,page_number=0):
http = urllib3.PoolManager()
base_api_url = 'https://www.snappfood.ir/restaurant/comment/vendor/{}/{}'
r = http.request('GET', base_api_url.format(restaurant_name,page_number))
json_data = json.loads(r.data.decode('utf-8'))
if json_data['status'] != True:
print('error in loading restaurant information ',restaurant_name)
return -1;
return json_data;
client = MongoClient("mongodb://localhost:27017")
db = client.zdf