Skip to content

Instantly share code, notes, and snippets.

@egm0121
Created November 13, 2012 15:16
Show Gist options
  • Save egm0121/4066265 to your computer and use it in GitHub Desktop.
Save egm0121/4066265 to your computer and use it in GitHub Desktop.
Demo PhantomJs Scraping + WebServer
var endpoints = {
'demo': 'https://www.google.it/search?hl=it&output=search'
},
system = require('system'),
server = require('webserver').create(),
evaluate = function (page, func) {
var args = [].slice.call(arguments, 2);
var fn = "function() { return (" + func.toString() + ").apply(this, " + JSON.stringify(args) + ");}";
return page.evaluate(fn);
},
searchStrategy = {
'demo' :function(endpoint,opts,callback){
var page = require('webpage').create();
page.open(endpoint+'&q='+opts.name);
page.onLoadFinished = function(){
callback(evaluate(page,function(){
return document.querySelector('.g a').innerText;
}));
};
}
},
Query = {
param : function(name,url){
var results = new RegExp('[\\?&]' + name + '=([^&#]*)').exec(url);
if (!results){ return 0; }
return results[1] || 0;
},
url: function(s){
return s.match(/^(?:([A-Za-z]+):)?(\/{0,3})([0-9.\-A-Za-z]+)(?::(\d+))?(?:\/([^?#]*))?(?:\?([^#]*))?(?:#(.*))?$/);
}
};
console.log('starting PhantomJS Scrape Server >-(*,*)-< ...');
console.log('http://host:8081/demo')
server.listen(8081, function (request, response) {
console.log('request started...')
response.headers = { "Content-Type": "application/json",
"Vary" : "Accept-Encoding" };
response.statusCode = 404;
var parts = Query.url('http://example.com'+request.url)[5] ?
Query.url('http://example.com'+request.url)[5].split('/') : [];
if( parts[0] == 'status'){
response.statusCode = 200;
response.write('OK');
response.close();
return true;
}
if( parts[0] in endpoints ){
try { response.statusCode = 200;
var p = {
'name': Query.param('name',request.url),
'corpUser':'tmfastweb',
'corpPwd':'fastweb0121Ntv!'};
for(var i in p){if(!p[i].length){throw 'error: missing search params'};}
searchStrategy['demo'](endpoints['demo'],p,function(data){response.write(data);response.close();})
} catch(err) {
response.statusCode = 400;
response.write(JSON.stringify({error:'bad request / error processing the request',debug: err.toString()}));
response.close();
}
return true;
}
response.write(JSON.stringify({error:'Endpoint Not Found'}));
response.close();
return;
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment