Skip to content

Instantly share code, notes, and snippets.

@cwchentw
Last active October 27, 2019 20:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cwchentw/6f2d69424451eae52c59325f662cff82 to your computer and use it in GitHub Desktop.
Save cwchentw/6f2d69424451eae52c59325f662cff82 to your computer and use it in GitHub Desktop.
Yahoo Finance Crawler in Puppeteer, Async Version
/* Author: Michael Chen; License: MIT */
const fs = require('fs');
const path = require('path');
const puppeteer = require('puppeteer');
const delay = function (ms) {
return new Promise(function (resolve) {
setTimeout(resolve, ms);
});
};
(async function () {
let args = process.argv;
if (args.length < 3) {
throw new Error('No valid asset');
}
const asset = args[2];
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.dirname(__filename)
});
try {
await page.goto('https://finance.yahoo.com/');
} catch (err) {
throw err;
}
const input = await page.$('#fin-srch-assist input');
await input.type(asset, { delay: 100 });
await input.press('Enter');
await page.waitForNavigation();
let items = await page.$$('a span');
for (let i = 0; i < items.length; i++) {
const text = await page.evaluate(function (elem) {
return elem.textContent;
}, items[i]);
if (text.match('Historical Data')) {
await items[i].click();
break;
}
}
await page.waitForNavigation();
const arrow = await page.$('.historical div div span svg');
await arrow.click();
await page.waitForNavigation();
const durations = await page.$$('[data-test=\"date-picker-menu\"] div span');
for (let i = 0; i < durations.length; i++) {
const text = await page.evaluate(function (elem) {
return elem.innerText;
}, durations[i]);
if (text.match('5Y')) {
await durations[i].click();
break;
}
}
await delay(3000);
const menuBtns = await page.$$('[data-test=\"date-picker-menu\"] div button');
for (let i = 0; i < menuBtns.length; i++) {
const text = await page.evaluate(function (elem) {
return elem.innerText;
}, menuBtns[i]);
if (text.match('Done')) {
await menuBtns[i].click();
break;
}
}
await delay(3000);
const buttons = await page.$$('button span');
for (let i = 0; i < buttons.length; i++) {
const text = await page.evaluate(function (elem) {
return elem.innerText;
}, buttons[i]);
if (text.match('Apply')) {
await buttons[i].click();
break;
}
}
await page.waitForNavigation();
const links = await page.$$('a span');
for (let i = 0; i < links.length; i++) {
const text = await page.evaluate(function (elem) {
return elem.innerText;
}, links[i]);
if (text.match('Download Data')) {
await links[i].click();
break;
}
}
await new Promise(function (resolve) {
var watcher = fs.watch(path.dirname(__filename), function (et, filename) {
if (et === 'rename' && filename === `${asset}.csv`) {
clearTimeout(timer);
watcher.close();
resolve();
}
});
var timer = setTimeout(function () {
watcher.close();
throw new Error('No file');
}, 30000);
});
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment