Skip to content

Instantly share code, notes, and snippets.

@fzls
Last active April 12, 2017 09:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fzls/88d362e09d9d46f4526c398b2023a57a to your computer and use it in GitHub Desktop.
Save fzls/88d362e09d9d46f4526c398b2023a57a to your computer and use it in GitHub Desktop.
import time
import json
import logging
import logging.handlers
import sys
import requests
### log 相关设置
# 设置时间格式
DATE_TIME_FORMAT = '%Y-%m-%d_%H-%M-%S'
logFormatter = logging.Formatter('%(asctime)s %(levelname)s [line:%(lineno)d] %(message)s')
log = logging.getLogger(__name__)
fileHandler = logging.handlers.RotatingFileHandler("full_logs.log", maxBytes=(1048576*5), backupCount=7, encoding='utf-8')
# fileHandler = logging.FileHandler("full_logs.log", encoding='utf-8')
fileHandler.setFormatter(logFormatter)
fileHandler.setLevel(logging.DEBUG)
log.addHandler(fileHandler)
consoleHandler = logging.StreamHandler(sys.stdout)
consoleHandler.setFormatter(logFormatter)
consoleHandler.setLevel(logging.ERROR)
log.addHandler(consoleHandler)
def eye_catching_logging(msg='', logger=log.info):
dashes = '-' * 50
msg = '%s %s %s' % (dashes, str(msg).title(), dashes)
logger(msg)
log.eye_catching_logging = eye_catching_logging
def list_debug(l: list):
line_number = inspect.stack()[1][2]
log.eye_catching_logging('called from [line:%s]' % (line_number))
posfix = 'OF PRINTING LIST with size of [{length}]'.format(length=len(l))
log.eye_catching_logging('{position} {posfix}'.format(position='start', posfix=posfix))
for v in l:
log.debug(v)
log.eye_catching_logging('{position} {posfix}'.format(position='end', posfix=posfix))
log.list_debug = list_debug
def json_debug(var):
line_number = inspect.stack()[1][2]
log.eye_catching_logging('called from [line:%s]' % (line_number))
log.debug(json.dumps(var, ensure_ascii=False, indent=2))
pass
log.json_debug = json_debug
if __name__ == '__main__':
api = 'https://mainsite-restapi.ele.me/shopping/restaurant/{id}?latitude=30.262373&longitude=120.12105'
start_id, end_id = map(int, input('enter the start_id and end_id(like 1111 2345) : ').split(' '))
id = start_id
valid_shops = []
_session = requests.session()
start_at = time.time()
loop_start_at = time.time()
while id<= end_id:
prefix = '[%09d] : '%id
res = _session.get(api.format(id=id)).json() # type: dict
if res.get('name') != 'RESTAURANT_NOT_FOUND':
# http://api.map.baidu.com/geocoder/v2/?output=json&ak=Eze6dPlb3bnUrihPNaaKljdUosb4G41B&location=30.271933,120.1195
# 根据坐标添加地理信息
# {"status":0,"result":{"location":{"lng":120.11949999999993,"lat":30.271933048715849},"formatted_address":"浙江省杭州市西湖区西溪路","business":"西溪,西湖,古荡","addressComponent":{"country":"中国","country_code":0,"province":"浙江省","city":"杭州市","district":"西湖区","adcode":"330106","street":"西溪路","street_number":"","direction":"","distance":""},"pois":[],"poiRegions":[],"sematic_description":"秦亭山北123米","cityCode":179}}
shop = {
'id':id,
'name': res.get('name'),
'address': res.get('address'),
'latitude': res.get('latitude'),
'longitude': res.get('longitude'),
# 'city_code': _session.get(baidu_api).json()['result']['cityCode'],
}
log.error(prefix + json.dumps(shop))
valid_shops.append(shop)
else:
# log.error(prefix + res.get('message'))
pass
# 每次请求相隔0.1s恰好不会被ban
time.sleep(0.1)
id+=1
if id % 100 == 0:
log.error('got 100 data, now we have [%8d/%8d] data, current id is [%9d], with id started at %9d, this loop used [%8.2f]s, total running %8.2fs'%(len(valid_shops), id-start_id, id, start_id, time.time() - loop_start_at,time.time()-start_at))
loop_start_at = time.time()
with open('valid_shops_start_at_{start_id}.json'.format(start_id=start_id), 'w', encoding='utf-8') as save:
log.error('exporting...')
json.dump(valid_shops, save, indent=2)
log.error('exported')
pass
log.error('finished')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment