Skip to content

Instantly share code, notes, and snippets.

@pandada8
Created July 5, 2018 04:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pandada8/5a8a49f23878f4c20914479933bfcc5e to your computer and use it in GitHub Desktop.
Save pandada8/5a8a49f23878f4c20914479933bfcc5e to your computer and use it in GitHub Desktop.
简单的自如爬虫
import pymongo
import requests
import sys
col = pymongo.MongoClient().get_database('ziroom').get_collection('room')
r = requests.session()
r.headers['User-Agent'] = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
def get_list_of_room(name):
page = 1
while True:
data = r.get("http://m.ziroom.com/v7/room/list.json", params={ "city_code": "110000", "page": page, "keywords": name}).json()
if len(data['data']['rooms']):
yield from data['data']['rooms']
page += 1
else:
break
def fetch_detail(id):
if col.find_one({"_id": id}):
return
data = r.get('http://m.ziroom.com/v7/room/detail.json', params={'city_code': "110000", 'id': id}).json()['data']
data['_id'] = data['id']
col.insert(data)
print(data['subway_primary'], data['price'], data['price_unit'], data['id'])
def main():
for room in get_list_of_room(sys.argv[1]):
fetch_detail(room['id'])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment