Created
October 14, 2018 23:47
-
-
Save cwchentw/0909695b48ed409dacd69236966790d9 to your computer and use it in GitHub Desktop.
Yahoo Finance Crawler as a Java Swing app
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
YahooFinanceCrawler | |
Version: 1.0 | |
Copyright: 2018, Michael Chen; Apache 2.0. | |
System Requirments: | |
- JDK 8 | |
- Selenium Java package and its dependencies. | |
- MgntUtils | |
*/ | |
package tw.cwchen.crawler; | |
import java.util.concurrent.ThreadLocalRandom; | |
import java.util.concurrent.TimeUnit; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.HashMap; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.nio.file.DirectoryNotEmptyException; | |
import java.nio.file.NoSuchFileException; | |
import javax.swing.JFrame; | |
import javax.swing.Box; | |
import javax.swing.JPanel; | |
import javax.swing.JLabel; | |
import javax.swing.JButton; | |
import javax.swing.JTextField; | |
import javax.swing.JComboBox; | |
import java.awt.Container; | |
import java.awt.BorderLayout; | |
import java.awt.FlowLayout; | |
import java.awt.event.ActionEvent; | |
import com.mgnt.utils.TimeUtils; | |
import org.openqa.selenium.By; | |
import org.openqa.selenium.chrome.ChromeOptions; | |
import org.openqa.selenium.WebDriver; | |
import org.openqa.selenium.WebElement; | |
import org.openqa.selenium.chrome.ChromeDriver; | |
public class YahooFinanceCrawler { | |
private final String site = "https://finance.yahoo.com/"; | |
public static enum TimeSpan { | |
TS_1D, | |
TS_5D, | |
TS_3M, | |
TS_6M, | |
TS_YTD, | |
TS_1Y, | |
TS_5Y, | |
TS_Max, | |
}; | |
public void run(String targetAsset, TimeSpan timeSpan, String downloadPath) { | |
// Set default download path for Chrome. | |
ChromeOptions options = new ChromeOptions(); | |
Map<String, Object> prefs = new HashMap<>(); | |
prefs.put("download.default_directory", downloadPath); | |
options.setExperimentalOption("prefs", prefs); | |
// Start a new Chrome instance. | |
WebDriver driver = new ChromeDriver(options); | |
driver.get(site); | |
// Wait the page to refresh. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(9, 13), TimeUnit.SECONDS); | |
// Send search target to the website. | |
WebElement input = driver.findElement(By.cssSelector("#fin-srch-assist input")); | |
input.sendKeys(targetAsset); | |
input.submit(); | |
// Wait the page to refresh. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(6, 9), TimeUnit.SECONDS); | |
// Click on "Historical Data" subpage. | |
List<WebElement> subpages = driver.findElements(By.cssSelector("a span")); | |
for (WebElement subpage : subpages) { | |
if (subpage.getText().equals("Historical Data")) { | |
subpage.click(); | |
break; | |
} | |
} | |
// Simulate idling. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS); | |
// Select the dialog. | |
WebElement arrow = driver.findElement(By.cssSelector(".historical div div span svg")); | |
arrow.click(); | |
// Simulate idling. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS); | |
// Select the duration. | |
List<WebElement> durations = driver.findElements(By.cssSelector("[data-test=\"date-picker-menu\"] div span")); | |
for (WebElement duration : durations) { | |
if (duration.getText().equals(timeSpanToString(timeSpan))) { | |
duration.click(); | |
break; | |
} | |
} | |
// Simulate idling. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS); | |
// Select "Done" button. | |
WebElement done = driver.findElement(By.cssSelector("[data-test=\"date-picker-menu\"] div button")); | |
done.click(); | |
// Simulate idling. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS); | |
// Apply the change. | |
List<WebElement> buttons = driver.findElements(By.cssSelector("button span")); | |
for (WebElement button : buttons) { | |
if (button.getText().equals("Apply")) { | |
button.click(); | |
break; | |
} | |
} | |
try | |
{ | |
Files.deleteIfExists(Paths.get(downloadPath, targetAsset + ".csv")); | |
} | |
catch(NoSuchFileException e) | |
{ | |
System.out.println("No such file/directory exists"); | |
} | |
catch(DirectoryNotEmptyException e) | |
{ | |
System.out.println("Directory is not empty."); | |
} | |
catch(IOException e) | |
{ | |
System.out.println("Invalid permissions."); | |
} | |
// Download the data. | |
List<WebElement> links = driver.findElements(By.cssSelector("a span")); | |
for (WebElement link : links) { | |
if (link.getText().equals("Download Data")) { | |
link.click(); | |
break; | |
} | |
} | |
// Simulate idling. | |
TimeUtils.sleepFor(ThreadLocalRandom.current().nextInt(1, 4), TimeUnit.SECONDS); | |
// Close the browser. | |
driver.quit(); | |
} | |
private String timeSpanToString(TimeSpan ts) { | |
switch (ts) { | |
case TS_1D: | |
return "1D"; | |
case TS_5D: | |
return "5D"; | |
case TS_3M: | |
return "3M"; | |
case TS_6M: | |
return "6M"; | |
case TS_YTD: | |
return "YTD"; | |
case TS_1Y: | |
return "1Y"; | |
case TS_5Y: | |
return "5Y"; | |
case TS_Max: | |
return "Max"; | |
} | |
return ""; | |
} | |
public static void main(String[] args) { | |
YahooFinanceCrawler crawler = new YahooFinanceCrawler(); | |
// Create a new JFrame. | |
JFrame frame = new JFrame("Yahoo Finance Crawler"); | |
frame.setSize(320, 150); | |
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); | |
// Set the layout of the frame. | |
Container cr = frame.getContentPane(); | |
Box bv = Box.createVerticalBox(); | |
// Create the stock panel. | |
JPanel stockPanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); | |
stockPanel.add(new JLabel("Target asset: ")); | |
JTextField targetAssetField = new JTextField(15); | |
stockPanel.add(targetAssetField); | |
// Add the stock panel into the container. | |
bv.add(BorderLayout.WEST, stockPanel); | |
// Create the duration panel. | |
JPanel durationPanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); | |
durationPanel.add(new JLabel("Target duration: ")); | |
String[] targetDurations = { | |
"1 day (1D)", | |
"5 days (5D)", | |
"3 months (3M)", | |
"6 months (6M)", | |
"Year To Date (YTD)", | |
"1 year (1Y)", | |
"5 years (5Y)", | |
"Maximal (Max)" | |
}; | |
JComboBox targetDurationList = new JComboBox(targetDurations); | |
targetDurationList.setSelectedIndex(6); | |
durationPanel.add(targetDurationList); | |
bv.add(BorderLayout.WEST, durationPanel); | |
// Create submitBtn. | |
JButton submitBtn = new JButton("Submit"); | |
// Add the event listener for submitBtn. | |
submitBtn.addActionListener((ActionEvent e) -> { | |
String targetAsset = targetAssetField.getText(); | |
String targetDuration = targetDurationList.getSelectedItem().toString(); | |
TimeSpan ts = TimeSpan.TS_5Y; | |
switch (targetDuration) { | |
case "1 day (1D)": | |
ts = TimeSpan.TS_1D; | |
break; | |
case "5 days (5D)": | |
ts = TimeSpan.TS_5D; | |
break; | |
case "3 months (3M)": | |
ts = TimeSpan.TS_3M; | |
break; | |
case "6 months (6M)": | |
ts = TimeSpan.TS_6M; | |
break; | |
case "Year To Date (YTD)": | |
ts = TimeSpan.TS_YTD; | |
break; | |
case "1 year (1Y)": | |
ts = TimeSpan.TS_1Y; | |
break; | |
case "5 years (5Y)": | |
ts = TimeSpan.TS_5Y; | |
break; | |
case "Maximal (Max)": | |
ts = TimeSpan.TS_Max; | |
break; | |
} | |
crawler.run(targetAsset, ts, System.getProperty("user.home") + "/Downloads"); | |
}); | |
bv.add(BorderLayout.EAST, submitBtn); | |
// Add bv (the vertical box) to cr (the content pane). | |
cr.add(bv); | |
// Make the frame visible. | |
frame.setVisible(true); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment