Created
October 14, 2018 23:52
-
-
Save cwchentw/34a7d3f97ab69716f16bb545d589dafd to your computer and use it in GitHub Desktop.
Yahoo Finance Crawler as a Python script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
############################################################################## | |
# fetchStockData.py - a friendly Yahoo Finance crawler. # | |
# # | |
# Requirements # | |
# # | |
# - Python 3 # | |
# - Selenium package for Python # | |
# - The web driver for Chrome # | |
# # | |
# 2018, Michael Chen; Apache 2.0 # | |
############################################################################## | |
import os | |
import sys | |
import time | |
from random import randint | |
from selenium import webdriver | |
# Parameters | |
targetAsset = None | |
timeSpan = "5Y" | |
downloadPath = os.path.dirname(os.path.abspath(__file__)) | |
if len(sys.argv) < 2: | |
sys.stderr.write("No valid target asset\n") | |
sys.exit(1) | |
targetAsset = sys.argv[1] | |
# Change default downloading path. | |
options = webdriver.ChromeOptions() | |
prefs = {'download.default_directory' : downloadPath} | |
options.add_experimental_option('prefs', prefs) | |
# Create a new instance of the Chrome driver | |
driver = webdriver.Chrome(chrome_options=options) | |
# Go to the Yahoo Finance page | |
driver.get("https://finance.yahoo.com/") | |
# Wait the page to fresh. | |
time.sleep(10) | |
# Find the input box. | |
inputElement = driver.find_element_by_css_selector("#fin-srch-assist input") | |
# Type in the search. | |
inputElement.send_keys(targetAsset) | |
# Submit the form. | |
inputElement.submit() | |
# Wait the page to fresh. | |
time.sleep(10) | |
# Select the subpage of Historical Data. | |
items = driver.find_elements_by_css_selector("a span") | |
for item in items: | |
if item.text == "Historical Data": | |
item.click() | |
break | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Help Yahoo to earn some money. | |
ad = driver.find_element_by_css_selector("#sb_rel_defaultdestLDRB") | |
ad.click() | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Select the dialog. | |
arrows = driver.find_elements_by_css_selector(".historical div div span svg") | |
arrows[0].click() | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Select some time span. | |
durations = driver.find_elements_by_css_selector("[data-test=\"date-picker-menu\"] div span") | |
for duration in durations: | |
if duration.text == timeSpan: | |
duration.click() | |
break | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Finish the selection by clicking Done button. | |
buttons = driver.find_elements_by_css_selector("[data-test=\"date-picker-menu\"] div button") | |
for button in buttons: | |
if button.text == "Done": | |
button.click() | |
break | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Apply the change. | |
buttons = driver.find_elements_by_css_selector("button span") | |
for button in buttons: | |
if button.text == "Apply": | |
button.click() | |
break | |
time.sleep(randint(1, 3)) # Simulate idling | |
# Remove old file. | |
csvpath = os.path.join(downloadPath, "%s.csv" % targetAsset) | |
if os.path.exists(csvpath): | |
os.remove(csvpath) | |
# Download the data. | |
links = driver.find_elements_by_css_selector("a span") | |
for link in links: | |
if link.text == "Download Data": | |
link.click() | |
break | |
time.sleep(5) | |
# Close the browser. | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment