Skip to content

Instantly share code, notes, and snippets.

@mikeptweet
Created January 18, 2017 19:05
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikeptweet/f94fa2d029e8b61fe7cee4479515b0cf to your computer and use it in GitHub Desktop.
Save mikeptweet/f94fa2d029e8b61fe7cee4479515b0cf to your computer and use it in GitHub Desktop.
var args= require('system').args;
var webPage = require('webpage');
var page = webPage.create();
if(args.length <3) {
console.log("USAGE: phantomjs.exe scrapepage.js url sql\n");
console.log('ie.: phantomjs.exe scrapepage.js "https://news.ycombinator.com/news" "select nodeName,href,innerText from document where className=\'storylink\'"');
phantom.exit();
}
var url= args[1];
var sql=args[2];
page.open(url, function(status) {
page.includeJs("https://cdn.jsdelivr.net/g/js-xlsx@0.8.1(xlsx.core.min.js),alasql@0.3.5",function(){
var result=page.evaluate(function(sql) {
var response="";
try {
alasql("create table document");
alasql.tables.document.data=[].slice.call(document.all);
response=alasql(sql);
} catch(e) {
response=e.message;
}
return response;
},sql);
result = (typeof result == 'object') ? JSON.stringify(result,null,2) : result;
console.log(result);
phantom.exit();
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment