Skip to content

Instantly share code, notes, and snippets.

@zxyle
Created March 9, 2018 09:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zxyle/928a62cc6cbe352e44fd2dfaa1c6bee4 to your computer and use it in GitHub Desktop.
Save zxyle/928a62cc6cbe352e44fd2dfaa1c6bee4 to your computer and use it in GitHub Desktop.
姚沿的手机号归属地抓爬程序
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Zheng <zhengxiang@upg.cn>
# 采集网站名称: 姚沿-手机号归属地抓爬
# 主页url: http://www.ip138.com/sj/
# 采集策略:
from urllib.parse import quote_plus
import pymongo
import requests
from parsel import Selector
uri = "mongodb://%s:%s@%s" % (quote_plus("upgadmin"), quote_plus("upg@123"), "172.16.2.202")
client = pymongo.MongoClient(uri)
db = client["spider"]
post = db["phone_area"]
for i in range(1, 10000):
phone_number = f"166{str(i).zfill(4)}"
url = f"http://www.ip138.com:8080/search.asp?action=mobile&mobile={phone_number}"
resp = requests.get(url)
sel = Selector(resp.content.decode('gbk'))
# 卡号归属地
area = sel.xpath('//table[2]/tr[3]/td[2]/text()').extract_first()
# 卡类型
card_type = sel.xpath('//table[2]/tr[4]/td[2]/text()').extract_first()
# 区号
area_code = sel.xpath('//table[2]/tr[5]/td[2]/text()').extract_first()
# 邮编
post_code = sel.xpath('//table[2]/tr[6]/td[2]/text()').extract_first()
data = {"phone": phone_number, "area": area, "card_type": card_type,
"area_code": area_code, "post_code": post_code}
post.insert_one(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment