Skip to content

Instantly share code, notes, and snippets.

@fripSide
Created August 12, 2018 10:57
Show Gist options
  • Save fripSide/a78dffaf35d37f504911e8f0c7c3ac81 to your computer and use it in GitHub Desktop.
Save fripSide/a78dffaf35d37f504911e8f0c7c3ac81 to your computer and use it in GitHub Desktop.
crawler for stocks
# coding: utf-8
__author__ = 'fripSide'
import os
import requests as r
import lxml.html as html
import re
import pandas as pd
"""
TODO:
1. 抓取股票名称
2. 抓取每只股票有史以来的日线数据
3. 做模拟交易
"""
BASE_DIR = "data"
if not os.path.exists(BASE_DIR):
os.mkdir(BASE_DIR)
STOCK_NAME_URL = "http://quote.eastmoney.com/stocklist.html"
NETEASE_API = "http://img1.money.126.net/data/hs/kline/day/history/{}/{}.json"
def get_stock_name_list(url):
companies = []
data = r.get(url)
# print(data.encoding)
data.encoding = "gb2312"
# print(type(data.text))
content = data.text
tree = html.fromstring(content)
items = tree.cssselect("div#quotesearch > ul")
val_extract = re.compile("([^\(]+)\((.+)\)")
print(len(items))
wh = ("sh", "sz")
for idx, item in enumerate(items):
lis = item.cssselect("ul > li > a")
for li in lis:
li_val = li.text_content()
matches = val_extract.findall(li_val)
if matches:
name, code = matches[0]
# print(name, code)
companies.append([name, code, wh[idx]])
wf = "data/stock_code.csv"
with open(wf, "w") as fp:
# csvwriter = csv.writer(fp, delimiter=",")
for item in companies:
fp.write(",".join(item) + "\n")
# csvwriter.writerow(item)
return companies
# 读取并抓取每一个股票
def process_stock():
df = pd.read_csv("data/stock_code.csv", header=None, sep=',')
# print(df)
c2i = {"sz": 1, "sh": 0}
for li in df.values:
name, code, cy = li
stock = str(c2i[cy]) + str(code)
for y in range(1990, 2018):
url = NETEASE_API.format(y, stock)
print(url)
save_one(code, url)
def save_one(code, url):
data = r.get(url)
if data.status_code == 404:
return
data.encoding = "utf-8"
# print(data.text)
with open("data/{}.json".format(code), "w") as fp:
fp.write(data.text)
def main():
coms = get_stock_name_list(STOCK_NAME_URL)
# print(len(coms))
# print(coms)
process_stock()
save_one("399001", "http://img1.money.126.net/data/hs/kline/day/history/2015/1399001.json")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment