Skip to content

Instantly share code, notes, and snippets.

@cwchentw
Last active April 29, 2023 04:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cwchentw/f7a8834aa88fa8c1d73b87356aa7b825 to your computer and use it in GitHub Desktop.
Save cwchentw/f7a8834aa88fa8c1d73b87356aa7b825 to your computer and use it in GitHub Desktop.
Yahoo Finance Crawler in Puppeteer, Promise Version
/* Author: Michael Chen; License: MIT */
const fs = require('fs');
const path = require('path');
const puppeteer = require('puppeteer');
const delay = function (ms) {
return new Promise(function (resolve) {
setTimeout(resolve, ms);
});
};
let _browser;
let _page;
let asset;
let input;
puppeteer.launch({ headless: false })
.then(function (browser) {
_browser = browser;
return _browser;
})
.then(function (browser) {
_page = browser.newPage();
return _page;
})
.then(function (page) {
page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.dirname(__filename)
});
return;
})
.then(function () {
let args = process.argv;
if (args.length < 3) {
throw new Error('No valid asset');
}
asset = args[2];
return;
})
.then(function () {
return _page;
})
.then(function (page) {
try {
return page.goto('https://finance.yahoo.com');
} catch (err) {
throw err;
}
})
.then(function () {
return _page;
})
.then(function (page) {
input = page.$('#fin-srch-assist input');
return input;
})
.then(function (input) {
return input.type(asset, { delay: 100 });
})
.then(function () {
return input;
})
.then(function (input) {
return input.press('Enter');
})
.then(function () {
return _page;
})
.then(function (page) {
return page.waitForNavigation();
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$$('a span')
.then(function (items) {
return new Promise(function (resolve) {
resolve([page, items]);
});
});
})
.then(function (arr) {
let [page, items] = arr;
items.forEach(function (item) {
page.evaluate(function (elem) {
return elem.innerText;
}, item)
.then(function (text) {
if (text.match('Historical Data')) {
return item.click();
}
});
});
})
.then(function () {
return _page;
})
.then(function (page) {
return page.waitForNavigation();
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$('.historical div div span svg');
})
.then(function (arrow) {
return arrow.click();
})
.then(function () {
return _page;
})
.then(function (page) {
return page.waitForNavigation();
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$$('[data-test=\"date-picker-menu\"] div span')
.then(function (durations) {
return new Promise(function (resolve) {
resolve([page, durations]);
});
});
})
.then(function (arr) {
let [page, durations] = arr;
durations.forEach(function (duration) {
page.evaluate(function (elem) {
return elem.innerText;
}, duration)
.then(function (text) {
if (text.match('5Y')) {
return duration.click();
}
});
})
})
.then(function () {
return delay(3000);
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$$('[data-test=\"date-picker-menu\"] div button')
.then(function (buttons) {
return new Promise(function (resolve) {
resolve([page, buttons]);
});
});
})
.then(function (arr) {
let [page, buttons] = arr;
buttons.forEach(function (button) {
page.evaluate(function (elem) {
return elem.innerText;
}, button)
.then(function (text) {
if (text.match('Done')) {
return button.click();
}
});
});
})
.then(function () {
return delay(3000);
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$$('button span')
.then(function (buttons) {
return new Promise(function (resolve) {
resolve([page, buttons]);
});
});
})
.then(function (arr) {
let [page, buttons] = arr;
buttons.forEach(function (button) {
page.evaluate(function (elem) {
return elem.innerText;
}, button)
.then(function (text) {
if (text.match('Apply')) {
return button.click();
}
});
});
})
.then(function () {
return _page;
})
.then(function (page) {
return page.waitForNavigation();
})
.then(function () {
return _page;
})
.then(function (page) {
return page.$$('a span')
.then(function (links) {
return new Promise(function (resolve) {
resolve([page, links]);
});
});
})
.then(function (arr) {
let [page, links] = arr;
links.forEach(function (link) {
page.evaluate(function (elem) {
return elem.innerText;
}, link)
.then(function (text) {
if (text.match('Download Data')) {
return link.click();
}
});
});
})
.then(function () {
return new Promise(function (resolve) {
var watcher = fs.watch(path.dirname(__filename), function (et, filename) {
if (et === 'rename' && filename === `${asset}.csv`) {
clearTimeout(timer);
watcher.close();
resolve();
}
});
var timer = setTimeout(function () {
watcher.close();
throw new Error('No file');
}, 30000);
});
})
.then(function () {
return _browser;
})
.then(function (browser) {
return browser.close();
})
.catch(function (err) {
console.log(err);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment