Skip to content

Instantly share code, notes, and snippets.

Created February 19, 2015 02:58
Show Gist options
  • Save anonymous/23fb51e3d79286cc6702 to your computer and use it in GitHub Desktop.
Save anonymous/23fb51e3d79286cc6702 to your computer and use it in GitHub Desktop.
var system = require('system'), fs=require('fs'), webpage=require('webpage');
var base_url = "http://www2.menu.com.do/#!/";
var base_output = "snapshots/";
var content = fs.read('businesses.json');
var content_obj = JSON.parse(content);
var businesses = content_obj.result;
var pages = [
{ url: '', output: 'index.html' },
{ url: 'intersticial', output: 'intersticial.html' },
{ url: 's/a:*', output: 's/a:_.html' },
{ url: 's/sw:0', output: 's/sw:0.html' },
{ url: 's/sw:a', output: 's/sw:a.html' },
{ url: 's/sw:b', output: 's/sw:b.html' },
{ url: 's/sw:c', output: 's/sw:c.html' },
{ url: 's/sw:d', output: 's/sw:d.html' },
{ url: 's/sw:e', output: 's/sw:e.html' },
{ url: 's/sw:f', output: 's/sw:f.html' },
{ url: 's/sw:g', output: 's/sw:g.html' },
{ url: 's/sw:h', output: 's/sw:h.html' },
{ url: 's/sw:i', output: 's/sw:i.html' },
{ url: 's/sw:j', output: 's/sw:j.html' },
{ url: 's/sw:k', output: 's/sw:k.html' },
{ url: 's/sw:l', output: 's/sw:l.html' },
{ url: 's/sw:m', output: 's/sw:m.html' },
{ url: 's/sw:n', output: 's/sw:n.html' },
{ url: 's/sw:o', output: 's/sw:o.html' },
{ url: 's/sw:p', output: 's/sw:p.html' },
{ url: 's/sw:q', output: 's/sw:q.html' },
{ url: 's/sw:r', output: 's/sw:r.html' },
{ url: 's/sw:s', output: 's/sw:s.html' },
{ url: 's/sw:t', output: 's/sw:t.html' },
{ url: 's/sw:u', output: 's/sw:u.html' },
{ url: 's/sw:v', output: 's/sw:v.html' },
{ url: 's/sw:w', output: 's/sw:w.html' },
{ url: 's/sw:x', output: 's/sw:x.html' },
{ url: 's/sw:y', output: 's/sw:y.html' },
{ url: 's/sw:z', output: 's/sw:z.html' },
{ url: 'tipos-de-comida', output: 'tipos-de-comida.html' },
{ url: 'menus-del-dia/', output: 'menus-del-dia.html' },
{ url: 'actividades/', output: 'actividades.html' },
{ url: 'ofertas/', output: 'ofertas.html' },
{ url: 's/c:catering', output: 's/c:catering.html' },
{ url: 's/c:panaderia,reposteria', output: 's/c:panaderia,reposteria.html' },
{ url: 'terminos', output: 'terminos.html' },
{ url: 'privacidad', output: 'privacidad.html' },
{ url: 'contacto', output: 'contacto.html' },
];
function handle_page(url, output){
url = base_url + url;
output = base_output + output;
console.log('Fetching ' + url);
var page = webpage.create();
page.onResourceRequested = function(requestData, request) {
switch (requestData['Content-Type']) {
case 'text/css':
case 'image/jpeg':
case 'image/png':
request.abort();
break;
case 'text/html':
if ((/afr\.php/gi).test(requestData.url)) {
request.abort();
}
break;
}
};
page.open(url, function (status) {
if (status != 'success') {
console.log('Failed. Status:' + status);
setTimeout(next_page, 0);
} else {
console.log('Success.');
console.log('Waiting for page to be ready...');
var delay, checker = (function() {
var html = page.evaluate(function () {
var body = document.getElementsByTagName('body')[0];
if(body.getAttribute('data-status') == 'ready') {
console.log('Ready.');
return document.getElementsByTagName('html')[0].outerHTML;
}
});
if(html) {
clearTimeout(delay);
console.log('Writing to file...');
fs.write(output, html, 'w');
console.log('Done.');
setTimeout(next_page, 0);
}
});
delay = setInterval(checker, 100);
}
});
}
function next_business() {
var business = businesses.shift();
var pages_len = pages.length;
if (!business) {
console.log("Done.");
console.log("Pages: " + pages_len);
setTimeout(next_page, 0);
return;
}
console.log("Business: " + business.Business.name + " (" + business.Business.vanity_name + ")");
if (business.Business.is_hidden || business.Business.deleted) {
console.log('+' + (pages.length - pages_len) + ' pages');
setTimeout(next_business, 0);
return;
}
// Profile
pages.push({
url: 'n/' + business.Business.vanity_name + '/',
output: base_output + 'n/' + business.Business.vanity_name + '.html',
});
// Menu
if (!business.Business.menu_hidden) {
pages.push({
url: 'n/' + business.Business.vanity_name + '/menu',
output: base_output + 'n/' + business.Business.vanity_name + '/menu.html',
});
}
//
if (!business.Business.venues_hidden) {
pages.push({
url: 'n/' + business.Business.vanity_name + '/mapa',
output: base_output + 'n/' + business.Business.vanity_name + '/mapa.html',
});
}
if (!business.Business.offers_hidden) {
pages.push({
url: 'n/' + business.Business.vanity_name + '/ofertas/',
output: base_output + 'n/' + business.Business.vanity_name + '/ofertas.html',
});
}
if (!business.Business.activities_hidden) {
pages.push({
url: 'n/' + business.Business.vanity_name + '/actividades/',
output: base_output + 'n/' + business.Business.vanity_name + '/actividades.html',
});
}
if (!business.Business.comments_hidden) {
pages.push({
url: 'n/' + business.Business.vanity_name + '/comentarios',
output: base_output + 'n/' + business.Business.vanity_name + '/comentarios.html',
});
}
console.log('+' + (pages.length - pages_len) + ' pages');
setTimeout(next_business, 0);
}
next_business();
function next_page() {
var current_page = pages.shift();
if (!current_page) {
console.log('Done.');
phantom.exit();
} else {
handle_page(current_page.url, current_page.output);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment