Skip to content

Instantly share code, notes, and snippets.

@luzihang123
Created June 3, 2020 11:09
Show Gist options
  • Save luzihang123/bf5195cbf9a80f672246d2b8739550a4 to your computer and use it in GitHub Desktop.
Save luzihang123/bf5195cbf9a80f672246d2b8739550a4 to your computer and use it in GitHub Desktop.
pyppeteer访问国家企业信用,渲染页面
import asyncio
from pyppeteer import launch
import jsonpath
import random
from scrapy.selector import Selector
from pyquery import PyQuery as pq
merchant_list = {
"data":
[
{
"id": 2,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "太仓永盛表面科技有限公司",
"notice_title": "太仓永盛表面科技有限公司营业执照作废声明",
"notice_date": "2018-10-22",
"reg_org_cn_license": "太仓市市场监督管理局",
"is_copy": "null",
"notice_content": "本企业不慎将营业执照副本丢失,特此声明作废。",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=252c18920017dde4ce0b1961ac6625dc&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=320500000000029569297&nodeNum=320000&entType=1",
"raw_search": "{'noticeId': '252c18920017dde4ce0b1961ac6625dc', 'noticeTitle': '太仓永盛表面科技有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '320500000000029569297', 'judAuth': '320585', 'judAuth_CN': '太仓市市场监督管理局', 'judDate': None, 'noticeDateStr': '2018/10/22 00:00:00', 'noticeDate': '2018-10-22', 'noticeType': '17', 'nodenum': '320000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 1, 'recordsFiltered': 1, 'perPage': 10, 'start': 0, 'data': [{'metaDataName': '', 'iD': None, 'subjectID': None, 'recId': '4135', 'pripId': '320500000000029569297', 'entName': '太仓永盛表面科技有限公司', 'uniscId': '913205856811441212', 'regorg': '320585', 'regorgCn': '太仓市市场监督管理局', 'isoricop': '3', 'liccanSta': '本企业不慎将营业执照副本丢失,特此声明作废。', 'liccanDate': '2018-10-22', 'isreilic': '2', 'reiDate': None, 'liccopyNo': '913205856811441212', 'nodeNum': '320000', 'sExtSequence': ''}], 'error': '', 'currentPage': 0, 'totalPage': 1}",
"duplicate_key": "71e03e78bfc587fcb16c6354b6fede40",
"created_at": "2020-06-02 20:04:14",
"updated_at": "2020-06-02 20:04:14"
},
{
"id": 1,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "定兴县雨锦箱包销售有限公司",
"notice_title": "定兴县雨锦箱包销售有限公司营业执照作废声明",
"notice_date": "2019-06-15",
"reg_org_cn_license": "定兴县市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=50a85a6bc32aebf52d35bb6acdf9fe41&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=130626000022017050800049&nodeNum=130000&entType=1",
"raw_search": "{'noticeId': '50a85a6bc32aebf52d35bb6acdf9fe41', 'noticeTitle': '定兴县雨锦箱包销售有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '130626000022017050800049', 'judAuth': '130626', 'judAuth_CN': '定兴县市场监督管理局', 'judDate': None, 'noticeDateStr': '2019/06/15 11:16:46', 'noticeDate': '2019-06-15', 'noticeType': '17', 'nodenum': '130000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "00e07b8703e85fd5a253a31a4b158ac8",
"created_at": "2020-06-02 20:03:59",
"updated_at": "2020-06-02 20:03:59"
},
{
"id": 6,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "洛阳尊磐商贸有限公司",
"notice_title": "洛阳尊磐商贸有限公司营业执照作废声明",
"notice_date": "2020-04-26",
"reg_org_cn_license": "伊川县市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=84032015fb701af0c693c2d1b2f65363&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=141000024190925972&nodeNum=410000&entType=1",
"raw_search": "{'noticeId': '84032015fb701af0c693c2d1b2f65363', 'noticeTitle': '洛阳尊磐商贸有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '141000024190925972', 'judAuth': '410329', 'judAuth_CN': '伊川县市场监督管理局', 'judDate': None, 'noticeDateStr': '2020/04/26 12:43:00', 'noticeDate': '2020-04-26', 'noticeType': '17', 'nodenum': '410000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "dae16c44ca0125cbd6f1198f64f8c673",
"created_at": "2020-06-02 20:05:03",
"updated_at": "2020-06-02 20:05:03"
},
{
"id": 3,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "无锡市荣原机械制造有限公司",
"notice_title": "无锡市荣原机械制造有限公司营业执照作废声明",
"notice_date": "2020-05-19",
"reg_org_cn_license": "无锡市惠山区市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=b0736b6dff6d0ed0539113ca30b55b83&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=320200000000099174171&nodeNum=320000&entType=1",
"raw_search": "{'noticeId': 'b0736b6dff6d0ed0539113ca30b55b83', 'noticeTitle': '无锡市荣原机械制造有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '320200000000099174171', 'judAuth': '320206', 'judAuth_CN': '无锡市惠山区市场监督管理局', 'judDate': None, 'noticeDateStr': '2020/05/19 00:00:00', 'noticeDate': '2020-05-19', 'noticeType': '17', 'nodenum': '320000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "1291efda3fd9995cb71578d6a160a3a4",
"created_at": "2020-06-02 20:04:17",
"updated_at": "2020-06-02 20:04:17"
},
{
"id": 5,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "苏州帮帮客汽车科技有限公司",
"notice_title": "苏州帮帮客汽车科技有限公司营业执照作废声明",
"notice_date": "2019-12-17",
"reg_org_cn_license": "苏州市姑苏区市场监督管理局苏州国家历史文化名城保护区市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=c7acd2237b56eb1030cb30ad2b25a4ac&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=320500000000109979191&nodeNum=320000&entType=1",
"raw_search": "{'noticeId': 'c7acd2237b56eb1030cb30ad2b25a4ac', 'noticeTitle': '苏州帮帮客汽车科技有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '320500000000109979191', 'judAuth': '320508', 'judAuth_CN': '苏州市姑苏区市场监督管理局苏州国家历史文化名城保护区市场监督管理局', 'judDate': None, 'noticeDateStr': '2019/12/17 00:00:00', 'noticeDate': '2019-12-17', 'noticeType': '17', 'nodenum': '320000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "70fcf0f557c91c97699dba4f980381b6",
"created_at": "2020-06-02 20:04:37",
"updated_at": "2020-06-02 20:04:37"
},
{
"id": 4,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "上海发元物业管理服务有限公司",
"notice_title": "上海发元物业管理服务有限公司营业执照作废声明",
"notice_date": "2020-05-28",
"reg_org_cn_license": "奉贤区市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=dd8d0cc20d8bdd35fc19689141d07fda&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=310120000022015111300660&nodeNum=310000&entType=1",
"raw_search": "{'noticeId': 'dd8d0cc20d8bdd35fc19689141d07fda', 'noticeTitle': '上海发元物业管理服务有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '310120000022015111300660', 'judAuth': '310120', 'judAuth_CN': '奉贤区市场监督管理局', 'judDate': None, 'noticeDateStr': '2020/05/28 17:58:23', 'noticeDate': '2020-05-28', 'noticeType': '17', 'nodenum': '310000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "580fd52bd931734a8ffe948fcdf7b67a",
"created_at": "2020-06-02 20:04:34",
"updated_at": "2020-06-02 20:04:34"
},
{
"id": 7,
"source": "国家企业信用信息公示系统",
"snapshot_date": "2020-06-02 00:00:00",
"merchant": "定兴县金宁箱包销售有限公司",
"notice_title": "定兴县金宁箱包销售有限公司营业执照作废声明",
"notice_date": "2019-06-15",
"reg_org_cn_license": "定兴县市场监督管理局",
"is_copy": "null",
"notice_content": "null",
"detail_link": "http://www.gsxt.gov.cn/affiche-query-area-info-getElicenseNullfynoView.html?noticeId=ef147588e54e61371e1fe232a2dd5008&areaId=100000",
"detail_api": "http://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-getElicenseNullfy.html?pripId=130626000022017052400099&nodeNum=130000&entType=1",
"raw_search": "{'noticeId': 'ef147588e54e61371e1fe232a2dd5008', 'noticeTitle': '定兴县金宁箱包销售有限公司营业执照作废声明', 'noticeNO': None, 'noticeContent': '130626000022017052400099', 'judAuth': '130626', 'judAuth_CN': '定兴县市场监督管理局', 'judDate': None, 'noticeDateStr': '2019/06/15 11:29:41', 'noticeDate': '2019-06-15', 'noticeType': '17', 'nodenum': '130000', 'lastModifiedTime': None, 'datafrom': None, 'entName': None, 'simpleCancelUrl': None}",
"raw_detail": "{'draw': 0, 'recordsTotal': 0, 'recordsFiltered': 0, 'perPage': 10, 'start': 0, 'data': [], 'error': '', 'currentPage': 0, 'totalPage': 0}",
"duplicate_key": "16d9729b58a34293f6e57fe9a1451688",
"created_at": "2020-06-02 20:05:32",
"updated_at": "2020-06-02 20:05:32"
}
]
}
async def fetch_async(url):
browser = await launch(headless=False)
page = await browser.newPage()
await page.goto(url)
await asyncio.sleep(random.randint(10, 12))
doc = pq(await page.content())
print(type(doc))
print(f" {doc('.item_sc')}")
await browser.close()
async def main():
‘’‘
正常玩法
’‘’
# for url in jsonpath.jsonpath(merchant_list, '$..detail_link'):
# await fetch_async(url)
’‘’
访问频率过高的玩法
’‘’
tasks = []
for url in jsonpath.jsonpath(merchant_list, '$..detail_link'):
tasks.append(fetch_async(url))
await asyncio.gather(*tasks)
asyncio.get_event_loop().run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment