Skip to content

Instantly share code, notes, and snippets.

@CMingTseng
Forked from virus-warnning/ticks_by_location.py
Created August 17, 2019 14:00
Show Gist options
  • Save CMingTseng/79447ccb2bb41e4bd8ec36d020fccab9 to your computer and use it in GitHub Desktop.
Save CMingTseng/79447ccb2bb41e4bd8ec36d020fccab9 to your computer and use it in GitHub Desktop.
分點進出取資料研究
import os
import re
import requests
import sys
import json
from bs4 import BeautifulSoup
session = requests.Session()
resp = session.get('https://bsr.twse.com.tw/bshtm/bsMenu.aspx')
if resp.status_code == 200:
soup = BeautifulSoup(resp.text, 'lxml')
nodes = soup.select('form input')
params = {}
for node in nodes:
name = node.attrs['name']
# 忽略鉅額交易的 radio button
if name in ('RadioButton_Excd', 'Button_Reset'):
continue
if 'value' in node.attrs:
params[node.attrs['name']] = node.attrs['value']
else:
params[node.attrs['name']] = ''
# 找 captcha 圖片
captcha_image = soup.select('#Panel_bshtm img')[0]['src']
m = re.search(r'guid=(.+)', captcha_image)
if m is None:
exit(1)
# 顯示 captcha 圖片
imgpath = '%s.jpg' % m.group(1)
url = 'https://bsr.twse.com.tw/bshtm/' + captcha_image
resp = requests.get(url)
if resp.status_code == 200:
with open(imgpath, 'wb') as f:
f.write(resp.content)
os.system('open ' + imgpath)
# 詢問 captcha 圖片的文字
# https://blog.steven5538.tw/2014/06/22/captcha-ocr-preprocess-python/
# http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
print('輸入圖型驗證碼: ', end='', flush=True)
vcode = sys.stdin.readline().strip()
params['CaptchaControl1'] = vcode
params['TextBox_Stkno'] = '2330'
# 送出
print(json.dumps(params, indent=2))
resp = session.post('https://bsr.twse.com.tw/bshtm/bsMenu.aspx', data=params)
if resp.status_code != 200:
print('任務失敗: %d' % resp.status_code)
exit(1)
soup = BeautifulSoup(resp.text, 'lxml')
nodes = soup.select('#HyperLink_DownloadCSV')
if len(nodes) == 0:
print('任務失敗,沒有下載連結')
exit(1)
# 下載分點進出 CSV
resp = session.get('https://bsr.twse.com.tw/bshtm/bsContent.aspx')
if resp.status_code != 200:
print('任務失敗,無法下載分點進出 CSV')
exit(1)
print(resp.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment