Last active
April 12, 2017 09:51
-
-
Save fzls/88d362e09d9d46f4526c398b2023a57a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import json | |
import logging | |
import logging.handlers | |
import sys | |
import requests | |
### log 相关设置 | |
# 设置时间格式 | |
DATE_TIME_FORMAT = '%Y-%m-%d_%H-%M-%S' | |
logFormatter = logging.Formatter('%(asctime)s %(levelname)s [line:%(lineno)d] %(message)s') | |
log = logging.getLogger(__name__) | |
fileHandler = logging.handlers.RotatingFileHandler("full_logs.log", maxBytes=(1048576*5), backupCount=7, encoding='utf-8') | |
# fileHandler = logging.FileHandler("full_logs.log", encoding='utf-8') | |
fileHandler.setFormatter(logFormatter) | |
fileHandler.setLevel(logging.DEBUG) | |
log.addHandler(fileHandler) | |
consoleHandler = logging.StreamHandler(sys.stdout) | |
consoleHandler.setFormatter(logFormatter) | |
consoleHandler.setLevel(logging.ERROR) | |
log.addHandler(consoleHandler) | |
def eye_catching_logging(msg='', logger=log.info): | |
dashes = '-' * 50 | |
msg = '%s %s %s' % (dashes, str(msg).title(), dashes) | |
logger(msg) | |
log.eye_catching_logging = eye_catching_logging | |
def list_debug(l: list): | |
line_number = inspect.stack()[1][2] | |
log.eye_catching_logging('called from [line:%s]' % (line_number)) | |
posfix = 'OF PRINTING LIST with size of [{length}]'.format(length=len(l)) | |
log.eye_catching_logging('{position} {posfix}'.format(position='start', posfix=posfix)) | |
for v in l: | |
log.debug(v) | |
log.eye_catching_logging('{position} {posfix}'.format(position='end', posfix=posfix)) | |
log.list_debug = list_debug | |
def json_debug(var): | |
line_number = inspect.stack()[1][2] | |
log.eye_catching_logging('called from [line:%s]' % (line_number)) | |
log.debug(json.dumps(var, ensure_ascii=False, indent=2)) | |
pass | |
log.json_debug = json_debug | |
if __name__ == '__main__': | |
api = 'https://mainsite-restapi.ele.me/shopping/restaurant/{id}?latitude=30.262373&longitude=120.12105' | |
start_id, end_id = map(int, input('enter the start_id and end_id(like 1111 2345) : ').split(' ')) | |
id = start_id | |
valid_shops = [] | |
_session = requests.session() | |
start_at = time.time() | |
loop_start_at = time.time() | |
while id<= end_id: | |
prefix = '[%09d] : '%id | |
res = _session.get(api.format(id=id)).json() # type: dict | |
if res.get('name') != 'RESTAURANT_NOT_FOUND': | |
# http://api.map.baidu.com/geocoder/v2/?output=json&ak=Eze6dPlb3bnUrihPNaaKljdUosb4G41B&location=30.271933,120.1195 | |
# 根据坐标添加地理信息 | |
# {"status":0,"result":{"location":{"lng":120.11949999999993,"lat":30.271933048715849},"formatted_address":"浙江省杭州市西湖区西溪路","business":"西溪,西湖,古荡","addressComponent":{"country":"中国","country_code":0,"province":"浙江省","city":"杭州市","district":"西湖区","adcode":"330106","street":"西溪路","street_number":"","direction":"","distance":""},"pois":[],"poiRegions":[],"sematic_description":"秦亭山北123米","cityCode":179}} | |
shop = { | |
'id':id, | |
'name': res.get('name'), | |
'address': res.get('address'), | |
'latitude': res.get('latitude'), | |
'longitude': res.get('longitude'), | |
# 'city_code': _session.get(baidu_api).json()['result']['cityCode'], | |
} | |
log.error(prefix + json.dumps(shop)) | |
valid_shops.append(shop) | |
else: | |
# log.error(prefix + res.get('message')) | |
pass | |
# 每次请求相隔0.1s恰好不会被ban | |
time.sleep(0.1) | |
id+=1 | |
if id % 100 == 0: | |
log.error('got 100 data, now we have [%8d/%8d] data, current id is [%9d], with id started at %9d, this loop used [%8.2f]s, total running %8.2fs'%(len(valid_shops), id-start_id, id, start_id, time.time() - loop_start_at,time.time()-start_at)) | |
loop_start_at = time.time() | |
with open('valid_shops_start_at_{start_id}.json'.format(start_id=start_id), 'w', encoding='utf-8') as save: | |
log.error('exporting...') | |
json.dump(valid_shops, save, indent=2) | |
log.error('exported') | |
pass | |
log.error('finished') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment