Skip to content

Instantly share code, notes, and snippets.

@maliubiao
Last active October 30, 2021 11:16
Show Gist options
  • Save maliubiao/d219fb489d15a30c1e9c438b65e12c66 to your computer and use it in GitHub Desktop.
Save maliubiao/d219fb489d15a30c1e9c438b65e12c66 to your computer and use it in GitHub Desktop.
stock-report-pdf-downloader
"""
根据6位股票编号,下载它最近的业绩报告全文,跟踪一下公司的发展轨迹
"""
import requests
import re
import json
import sys
import pdb
def jsonp_json(text):
a = text.find("(")
b = text.rfind(")")
if a == -1 or b == -1 or a > b:
raise ValueError("bad jsonp")
return json.loads(text[a+1:b])
def download_pdf(pdfCode):
u = f"http://np-cnotice-stock.eastmoney.com/api/content/ann?cb=jQuery1123009177686879890468_1624689075466&art_code={pdfCode}&client_source=web&page_index=1&_=1624689075467"
t = requests.get(u).text
link = re.findall("https://pdf.dfcfw.com/pdf.*?{}.*?pdf".format(pdfCode), t)[0]
res = requests.get(link)
assert res.status_code == 200
return res.content
valid_report = ("年年度报告", "第一季度报告全文", "第三季度报告全文", "年半年度报告", "业绩预告")
invalid_report = ("关于", "会议", "意见", "摘要")
def any_exists(words, title):
found = False
for i in words:
if i in title:
found = True
return found
def download(code):
for page in range (1, 20):
u = f"http://np-anotice-stock.eastmoney.com/api/security/ann?cb=jQuery112304961501953883929_1624688055706&sr=-1&page_size=50&page_index={page}&ann_type=A&client_source=web&stock_list={code}&f_node=0&s_node=0"
res = requests.get(u)
data = jsonp_json(res.text)
if len(data["data"]["list"]) == 0:
break
for i in data["data"]["list"]:
if any_exists(valid_report, i["title"]) and not any_exists(invalid_report, i["title"]):
content =download_pdf(i["art_code"])
fn = "%s-%s.pdf" % (i["title"].replace(":", "-"), i["notice_date"].split(" ")[0])
print(fn)
with open(fn, "wb+") as f:
f.write(content)
if __name__ == "__main__":
download(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment