Skip to content

Instantly share code, notes, and snippets.

@ganglio
Created December 13, 2012 14:39
Show Gist options
  • Save ganglio/4276763 to your computer and use it in GitHub Desktop.
Save ganglio/4276763 to your computer and use it in GitHub Desktop.
CasperJS: Facebook photos scraper
var casper = require('casper').create({
verbose : true,
logLevel : 'info'
});
var images = [];
var fs=require("fs")
/**
* Configuration here
*/
var login_username = "username";
var login_password = "password";
var scraped_username = "username2";
/**
* Everything starts here!
* I use the mobile version of facebook as the DOM is waaay simpler to scrape.
*/
casper.start('http://m.facebook.com', function() {
// The pretty HUGE viewport allows for roughly 1200 images.
// If you need more you can either resize the viewport or scroll down the viewport to load more DOM (probably the best approach).
this.viewport(2048,4096);
this.fill('form#login_form', {
'email': login_username,
'pass': login_password
}, true);
});
casper.thenOpen("https://m.facebook.com/"+scraped_username+"?v=photos", function(){
// We wait four seconds so that the page loaded (the lazy load is amazing for actual users but bots don't like it ;)
this.wait(4000,function(){
// Just to be sure we are on the correct page.
this.capture("photo_index.png");
// and then we fetch the images
images = this.evaluate(function(){
var images = document.querySelectorAll(".timeline.photos i.img");
return Array.prototype.map.call(images,function(e){
return e.style.backgroundImage.match(/url\((.*)\)/)[1].split("/").reverse()[0];
});
});
});
});
casper.then(function(){
// once done we write the images URLs to screen.
// I'm still working on a proper way to download the images locally. Any idea?
this.each(images, function(self, fname) {
var url = "https://fbcdn-photos-a.akamaihd.net/hphotos-ak-ash4/s720x720/"+fname;
this.echo(url);
});
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment