Skip to content

Instantly share code, notes, and snippets.

@aichholzer
Last active August 29, 2015 14:21
Show Gist options
  • Save aichholzer/dc2e1837754eeadc3bb0 to your computer and use it in GitHub Desktop.
Save aichholzer/dc2e1837754eeadc3bb0 to your computer and use it in GitHub Desktop.
/**
* Simplest web page scrape
*
* sudo port install phantomjs
* npm install -g casperjs
*/
var casper = require('casper').create(),
links = [],
images = [],
parseLinks = function parseLinks () {
var links = document.querySelectorAll('a');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('href');
});
},
parseImages = function parseLinks () {
var links = document.querySelectorAll('img');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('src');
});
};
casper.start('http://amazon.com', function() {
// Loop the relative links and get the images
links = this.evaluate(parseLinks);
images = this.evaluate(parseImages);
this.echo(links.length + ' links found:');
//this.echo(' - ' + links.join('\n - ')).exit();
this.echo(images.length + ' images found:');
this.echo(' - ' + images.join('\n - ')).exit();
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment