Skip to content

Instantly share code, notes, and snippets.

@ianchanning
Last active May 16, 2016 09:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ianchanning/fbdb99a6bdfa61d34ab283a31335af3e to your computer and use it in GitHub Desktop.
Save ianchanning/fbdb99a6bdfa61d34ab283a31335af3e to your computer and use it in GitHub Desktop.
Bowie discography scraping
var links = [];
var casper = require('casper').create();
function getLinks() {
var links = document.querySelectorAll('div#mw-content-text table.wikitable tbody tr td i b a');
return Array.prototype.map.call(links, function(e) {
return 'https://en.wikipedia.org' + e.getAttribute('href');
});
}
casper.start('https://en.wikipedia.org/wiki/David_Bowie_discography');
casper.then(function() {
// aggregate results for the 'casperjs' search
links = this.evaluate(getLinks);
casper.each(links, function (self, link) {
self.thenOpen(link, function () {
this.echo(this.getTitle() + " - " + link);
});
});
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment