-
-
Save ekyfauzi/1ca464ce0a94d79238be855c1de38f07 to your computer and use it in GitHub Desktop.
Scrapping a React App using PhantomJS and Cheerio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var phantom = require('phantom'); | |
var Q = require('q'); | |
var cheerio = require('cheerio'); | |
var _ph, _page, _outObj; | |
var url = ABSOLUTE_URL; // change here for your React app site | |
phantom.create().then(ph => { | |
_ph = ph; | |
return _ph.createPage(); | |
}).then(page => { | |
_page = page; | |
return _page.open(url); | |
}).then(status => { | |
console.log(status); | |
return waitState(textPopulated, 3); | |
}).then(() => { | |
return _page.property('content'); | |
}).then(content => { | |
var $ = cheerio.load(content); | |
var resultados = []; | |
$('.item.panel.panel-default').each(function() { | |
var title = $(this).find('.title').text(); | |
var link = $(this).find('a').attr('href'); | |
resultados.push( '<' + link + '|' + title + '>' ); | |
}); | |
_page.close(); | |
_ph.exit(); | |
}).catch(e => console.log(e)); | |
function textPopulated() { | |
return _page.evaluate(function() { | |
return document.querySelector('#app').outerHTML; | |
}).then(function(html) { | |
return html; | |
}); | |
} | |
function waitState(state, timeout) { // timeout in seconds is optional | |
console.log('Start waiting for state: ' + state.name); | |
var limitTime = timeout * 1000 || 20000; | |
var startTime = new Date(); | |
return wait(); | |
function wait() { | |
return state().then(function(result) { | |
if (result) { | |
console.log('Reached state: ' + state.name); | |
return; | |
} else if (new Date() - startTime > limitTime) { | |
var errorMessage = 'Timeout state: ' + state.name; | |
throw new Error(errorMessage); | |
} else { | |
return Q.delay(50).then(wait); | |
} | |
}).catch(function(error) { | |
throw error; | |
}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment