Skip to content

Instantly share code, notes, and snippets.

@cwchentw
Created October 14, 2018 23:47
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cwchentw/0909695b48ed409dacd69236966790d9 to your computer and use it in GitHub Desktop.
Save cwchentw/0909695b48ed409dacd69236966790d9 to your computer and use it in GitHub Desktop.
Yahoo Finance Crawler as a Java Swing app
/*
YahooFinanceCrawler
Version: 1.0
Copyright: 2018, Michael Chen; Apache 2.0.
System Requirments:
- JDK 8
- Selenium Java package and its dependencies.
- MgntUtils
*/
package tw.cwchen.crawler;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.DirectoryNotEmptyException;
import java.nio.file.NoSuchFileException;
import javax.swing.JFrame;
import javax.swing.Box;
import javax.swing.JPanel;
import javax.swing.JLabel;
import javax.swing.JButton;
import javax.swing.JTextField;
import javax.swing.JComboBox;
import java.awt.Container;
import java.awt.BorderLayout;
import java.awt.FlowLayout;
import java.awt.event.ActionEvent;
import com.mgnt.utils.TimeUtils;
import org.openqa.selenium.By;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
public class YahooFinanceCrawler {
private final String site = "https://finance.yahoo.com/";
public static enum TimeSpan {
TS_1D,
TS_5D,
TS_3M,
TS_6M,
TS_YTD,
TS_1Y,
TS_5Y,
TS_Max,
};
public void run(String targetAsset, TimeSpan timeSpan, String downloadPath) {
// Set default download path for Chrome.
ChromeOptions options = new ChromeOptions();
Map<String, Object> prefs = new HashMap<>();
prefs.put("download.default_directory", downloadPath);
options.setExperimentalOption("prefs", prefs);
// Start a new Chrome instance.
WebDriver driver = new ChromeDriver(options);
driver.get(site);
// Wait the page to refresh.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(9, 13), TimeUnit.SECONDS);
// Send search target to the website.
WebElement input = driver.findElement(By.cssSelector("#fin-srch-assist input"));
input.sendKeys(targetAsset);
input.submit();
// Wait the page to refresh.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(6, 9), TimeUnit.SECONDS);
// Click on "Historical Data" subpage.
List<WebElement> subpages = driver.findElements(By.cssSelector("a span"));
for (WebElement subpage : subpages) {
if (subpage.getText().equals("Historical Data")) {
subpage.click();
break;
}
}
// Simulate idling.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS);
// Select the dialog.
WebElement arrow = driver.findElement(By.cssSelector(".historical div div span svg"));
arrow.click();
// Simulate idling.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS);
// Select the duration.
List<WebElement> durations = driver.findElements(By.cssSelector("[data-test=\"date-picker-menu\"] div span"));
for (WebElement duration : durations) {
if (duration.getText().equals(timeSpanToString(timeSpan))) {
duration.click();
break;
}
}
// Simulate idling.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS);
// Select "Done" button.
WebElement done = driver.findElement(By.cssSelector("[data-test=\"date-picker-menu\"] div button"));
done.click();
// Simulate idling.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS);
// Apply the change.
List<WebElement> buttons = driver.findElements(By.cssSelector("button span"));
for (WebElement button : buttons) {
if (button.getText().equals("Apply")) {
button.click();
break;
}
}
try
{
Files.deleteIfExists(Paths.get(downloadPath, targetAsset + ".csv"));
}
catch(NoSuchFileException e)
{
System.out.println("No such file/directory exists");
}
catch(DirectoryNotEmptyException e)
{
System.out.println("Directory is not empty.");
}
catch(IOException e)
{
System.out.println("Invalid permissions.");
}
// Download the data.
List<WebElement> links = driver.findElements(By.cssSelector("a span"));
for (WebElement link : links) {
if (link.getText().equals("Download Data")) {
link.click();
break;
}
}
// Simulate idling.
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS);
// Close the browser.
driver.quit();
}
private String timeSpanToString(TimeSpan ts) {
switch (ts) {
case TS_1D:
return "1D";
case TS_5D:
return "5D";
case TS_3M:
return "3M";
case TS_6M:
return "6M";
case TS_YTD:
return "YTD";
case TS_1Y:
return "1Y";
case TS_5Y:
return "5Y";
case TS_Max:
return "Max";
}
return "";
}
public static void main(String[] args) {
YahooFinanceCrawler crawler = new YahooFinanceCrawler();
// Create a new JFrame.
JFrame frame = new JFrame("Yahoo Finance Crawler");
frame.setSize(320, 150);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
// Set the layout of the frame.
Container cr = frame.getContentPane();
Box bv = Box.createVerticalBox();
// Create the stock panel.
JPanel stockPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
stockPanel.add(new JLabel("Target asset: "));
JTextField targetAssetField = new JTextField(15);
stockPanel.add(targetAssetField);
// Add the stock panel into the container.
bv.add(BorderLayout.WEST, stockPanel);
// Create the duration panel.
JPanel durationPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
durationPanel.add(new JLabel("Target duration: "));
String[] targetDurations = {
"1 day (1D)",
"5 days (5D)",
"3 months (3M)",
"6 months (6M)",
"Year To Date (YTD)",
"1 year (1Y)",
"5 years (5Y)",
"Maximal (Max)"
};
JComboBox targetDurationList = new JComboBox(targetDurations);
targetDurationList.setSelectedIndex(6);
durationPanel.add(targetDurationList);
bv.add(BorderLayout.WEST, durationPanel);
// Create submitBtn.
JButton submitBtn = new JButton("Submit");
// Add the event listener for submitBtn.
submitBtn.addActionListener((ActionEvent e) -> {
String targetAsset = targetAssetField.getText();
String targetDuration = targetDurationList.getSelectedItem().toString();
TimeSpan ts = TimeSpan.TS_5Y;
switch (targetDuration) {
case "1 day (1D)":
ts = TimeSpan.TS_1D;
break;
case "5 days (5D)":
ts = TimeSpan.TS_5D;
break;
case "3 months (3M)":
ts = TimeSpan.TS_3M;
break;
case "6 months (6M)":
ts = TimeSpan.TS_6M;
break;
case "Year To Date (YTD)":
ts = TimeSpan.TS_YTD;
break;
case "1 year (1Y)":
ts = TimeSpan.TS_1Y;
break;
case "5 years (5Y)":
ts = TimeSpan.TS_5Y;
break;
case "Maximal (Max)":
ts = TimeSpan.TS_Max;
break;
}
crawler.run(targetAsset, ts, System.getProperty("user.home") + "/Downloads");
});
bv.add(BorderLayout.EAST, submitBtn);
// Add bv (the vertical box) to cr (the content pane).
cr.add(bv);
// Make the frame visible.
frame.setVisible(true);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment