maliubiao/stock-report.py

## stock-report.py
"""
根据6位股票编号，下载它最近的业绩报告全文，跟踪一下公司的发展轨迹
"""

import requests
import re
import json
import sys
import pdb

def jsonp_json(text):
    a = text.find("(")
    b = text.rfind(")")
    if a == -1 or b == -1 or a >  b:
        raise ValueError("bad jsonp")
    return json.loads(text[a+1:b])

def download_pdf(pdfCode):
    u = f"http://np-cnotice-stock.eastmoney.com/api/content/ann?cb=jQuery1123009177686879890468_1624689075466&art_code={pdfCode}&client_source=web&page_index=1&_=1624689075467"
    t = requests.get(u).text
    link = re.findall("https://pdf.dfcfw.com/pdf.*?{}.*?pdf".format(pdfCode), t)[0]
    res =  requests.get(link)
    assert res.status_code == 200
    return res.content


valid_report = ("年年度报告", "第一季度报告全文", "第三季度报告全文", "年半年度报告", "业绩预告")
invalid_report = ("关于", "会议", "意见", "摘要")
def any_exists(words, title):
    found = False
    for i in words:
        if i in title:
            found = True
    return found


def download(code):
    for page in range (1, 20):
        u = f"http://np-anotice-stock.eastmoney.com/api/security/ann?cb=jQuery112304961501953883929_1624688055706&sr=-1&page_size=50&page_index={page}&ann_type=A&client_source=web&stock_list={code}&f_node=0&s_node=0"
        res = requests.get(u)
        data = jsonp_json(res.text)
        if len(data["data"]["list"]) == 0:
            break
        for i in data["data"]["list"]:
            if any_exists(valid_report, i["title"]) and not any_exists(invalid_report, i["title"]):
                content =download_pdf(i["art_code"])
                fn = "%s-%s.pdf" % (i["title"].replace(":", "-"), i["notice_date"].split(" ")[0])
                print(fn)
                with open(fn, "wb+") as f:
                    f.write(content)


if __name__ == "__main__":
    download(sys.argv[1])
	"""
	根据6位股票编号，下载它最近的业绩报告全文，跟踪一下公司的发展轨迹
	"""

	import requests
	import re
	import json
	import sys
	import pdb

	def jsonp_json(text):
	a = text.find("(")
	b = text.rfind(")")
	if a == -1 or b == -1 or a > b:
	raise ValueError("bad jsonp")
	return json.loads(text[a+1:b])

	def download_pdf(pdfCode):
	u = f"http://np-cnotice-stock.eastmoney.com/api/content/ann?cb=jQuery1123009177686879890468_1624689075466&art_code={pdfCode}&client_source=web&page_index=1&_=1624689075467"
	t = requests.get(u).text
	link = re.findall("https://pdf.dfcfw.com/pdf.?{}.?pdf".format(pdfCode), t)[0]
	res = requests.get(link)
	assert res.status_code == 200
	return res.content


	valid_report = ("年年度报告", "第一季度报告全文", "第三季度报告全文", "年半年度报告", "业绩预告")
	invalid_report = ("关于", "会议", "意见", "摘要")
	def any_exists(words, title):
	found = False
	for i in words:
	if i in title:
	found = True
	return found


	def download(code):
	for page in range (1, 20):
	u = f"http://np-anotice-stock.eastmoney.com/api/security/ann?cb=jQuery112304961501953883929_1624688055706&sr=-1&page_size=50&page_index={page}&ann_type=A&client_source=web&stock_list={code}&f_node=0&s_node=0"
	res = requests.get(u)
	data = jsonp_json(res.text)
	if len(data["data"]["list"]) == 0:
	break
	for i in data["data"]["list"]:
	if any_exists(valid_report, i["title"]) and not any_exists(invalid_report, i["title"]):
	content =download_pdf(i["art_code"])
	fn = "%s-%s.pdf" % (i["title"].replace(":", "-"), i["notice_date"].split(" ")[0])
	print(fn)
	with open(fn, "wb+") as f:
	f.write(content)


	if __name__ == "__main__":
	download(sys.argv[1])