Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
newbie exercise with PhantomJS and web scraping
#! /bin/env phantomjs
/*
Teaching myself:
- [hopefully] clean/readable JavaScript coding
- PhantomJS to render a page from command-line
- using CSS selector to scrape an element value from DOM
*/
var SYS= require('system');
if(SYS.args.length <2) {
console.log('usage: '+ SYS.args[0] +' <Github Username> [CSS Selector]');
phantom.exit(1);
}
var user= SYS.args[1],
query= SYS.args.length <3
? 'h1.vcard-names > span.vcard-fullname' // Full Name
// ? 'ul.vcard-details > li.vcard-detail > a.email.js-obfuscate-email' // Email Address
: SYS.args[2],
WP= require('webpage').create();
//======================================================================
//----------------------------------------------------------------------
WP.onLoadFinished= function(status) {
if(status !=='success') {
console.log('ERROR: '+ status);
phantom.exit(1);
}
var vcard= 'html:root > body.logged_out > div.wrapper > div.site.clearfix > div#site-container\
> div.container > div.columns.profilecols.js-username > div.column.one-fourth.vcard',
value= WP.evaluate(getValue, selector= query ? vcard +' > '+ query : vcard);
function getValue(selector) {
ele= document.querySelector(selector)
if(ele.hasOwnProperty('innerHTML'))
return ele.innerHTML;
else return '';
}
console.log(value);
phantom.exit();
}
//======================================================================
WP.open('https://github.com/'+ user);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment