Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
TAIEX web crawler
#!/usr/bin/env python
##############################################################################
# fetchTAIEX.py #
# #
# Requirements #
# #
# - Python 3 #
# - Selenium package for Python #
# - The web driver for Chrome #
# #
# 2018, Michael Chen; Apache 2.0 #
##############################################################################
import csv
import os
import sys
import time
import datetime
from selenium import webdriver
validDurations = ['YTD', '1Y', '3Y', '5Y', '10Y', 'Max']
duration = 'YTD'
if len(sys.argv) < 2:
sys.stderr.write("No valid duration. Default to YTD.\n")
else:
duration = sys.argv[1]
if not duration in validDurations:
sys.stderr.write("Invalid duration: %s\n\n" % duration)
sys.stderr.write("Valid durations:\n")
for d in validDurations:
sys.stderr.write("\t%s\n" % d)
now = datetime.datetime.now()
year = None
month = now.month
if duration == 'YTD':
year = now.year
month = 1
elif duration == '1Y':
year = now.year - 1
elif duration == '3Y':
year = now.year - 3
elif duration == '5Y':
year = now.year - 5
elif duration == '10Y':
year = now.year - 10
elif duration == 'Max':
year = 88 + 1911
month = 1
# Create a new instance of the Chrome driver
driver = webdriver.Chrome()
# Go to TAIEX page
driver.get("http://www.twse.com.tw/zh/page/trading/indices/MI_5MINS_HIST.html")
# Wait the page to fresh.
time.sleep(10)
queryBtn = driver.find_element_by_css_selector(".main a")
# Wait the page to fresh.
time.sleep(3)
def monToStr(m):
if m < 10:
return '0' + str(m)
else:
return str(m)
sys.stderr.write("Fetch data from the website...\n")
pastDateStr = "%d%s" % (year, monToStr(month))
currDateStr = "%d%s" % (now.year, monToStr(now.month))
data = []
isEnd = False
currYear = year
currMonth = month
# Select the initial year.
ys = driver.find_elements_by_css_selector("select[name=\"yy\"] option")
for y in ys:
if y.get_attribute("value") == str(currYear):
y.click()
time.sleep(2)
break
while not isEnd:
if currYear < now.year:
if currMonth <= 12:
ms = driver.find_elements_by_css_selector("select[name=\"mm\"] option")
for m in ms:
if m.get_attribute("value") == str(currMonth):
m.click()
time.sleep(2)
queryBtn.click()
time.sleep(3)
items = driver.find_elements_by_css_selector("#report-table_wrapper tbody tr")
for item in items:
tds = item.find_elements_by_css_selector("td")
data.append([td.text for td in tds])
break
currMonth += 1
else:
currMonth = 1
currYear += 1
# Update the year when one year progresses.
ys = driver.find_elements_by_css_selector("select[name=\"yy\"] option")
for y in ys:
if y.get_attribute("value") == str(currYear):
y.click()
time.sleep(2)
break
else:
if currMonth <= now.month:
ms = driver.find_elements_by_css_selector("select[name=\"mm\"] option")
for m in ms:
if m.get_attribute("value") == str(currMonth):
m.click()
time.sleep(2)
queryBtn.click()
time.sleep(3)
items = driver.find_elements_by_css_selector("#report-table_wrapper tbody tr")
for item in items:
tds = item.find_elements_by_css_selector("td")
data.append([td.text for td in tds])
break
currMonth += 1
else:
isEnd = True
sys.stderr.write("Write data to csv file...\n")
with open("TAIEX_%sto%s.csv" % (pastDateStr, currDateStr), 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(["Date", "Open", "High", "Low", "Close"])
for d in data:
csvwriter.writerow(d)
# Close the browser.
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.