Skip to content

Instantly share code, notes, and snippets.

@mkhizeryounas
Last active April 20, 2018 17:41
Show Gist options
  • Save mkhizeryounas/7dd15ad30d2b8827f65754d0094499ac to your computer and use it in GitHub Desktop.
Save mkhizeryounas/7dd15ad30d2b8827f65754d0094499ac to your computer and use it in GitHub Desktop.
GetCeleb Apify Crawler
function pageFunction(context) {
var $ = context.jQuery;
var result = [];
// var _pages = 2018;
var _pages = 3;
var fetch = function(url, cb) {
// console.log(url);
$.ajax({
url: url,
type: 'GET',
success : function (res) {
return cb(res);
}
})
}
var extractData = function(count) {
var cont = true;
fetch("http://www.gotceleb.com/page/"+count, function(res) {
$(res).find('.post-inner').each(function(){
// console.log($(this).find('.post-date').text().indexOf(_pages));
if(count > _pages) {
// if($(this).find('.post-date').text().indexOf(_pages) < 0) {
console.log("Done process");
cont = false;
context.finish(result);
return cont;
}
var pageUrl = $(this).find('.post-title').find('a').attr('href');
console.log(pageUrl);
var imgs = [];
fetch(pageUrl, function(res2) {
$(res2).find('.gallery-item').each(function() {
imgs.push($(this).find('img').attr('src'));
})
});
var tmp = {
pageUrl : pageUrl,
imageUrl : imgs
}
// console.log(tmp)
result.push(tmp);
});
if(cont)
extractData(++count);
});
};
extractData(0);
context.willFinishLater();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment