Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Bowie discography scraping
var links = [];
var casper = require('casper').create();
function getLinks() {
var links = document.querySelectorAll('div#mw-content-text table.wikitable tbody tr td i b a');
return Array.prototype.map.call(links, function(e) {
return 'https://en.wikipedia.org' + e.getAttribute('href');
});
}
casper.start('https://en.wikipedia.org/wiki/David_Bowie_discography');
casper.then(function() {
// aggregate results for the 'casperjs' search
links = this.evaluate(getLinks);
casper.each(links, function (self, link) {
self.thenOpen(link, function () {
this.echo(this.getTitle() + " - " + link);
});
});
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment