Skip to content

Instantly share code, notes, and snippets.

@jakubbalada
Created January 31, 2018 16:22
Show Gist options
  • Save jakubbalada/033ddff986b76a08e8a3c51e2843b15a to your computer and use it in GitHub Desktop.
Save jakubbalada/033ddff986b76a08e8a3c51e2843b15a to your computer and use it in GitHub Desktop.
function pageFunction(context) {
var PAGES = 100; // each page has 40 reviews
// called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
var result = [];
var extractData = function(page) {
if( page < PAGES ) {
var api = "https://play.google.com/store/getreviews?authuser=0";
$.ajax({
url: api,
type: 'POST',
data: "reviewType=0&pageNum=" + page + "&id=com.instagram.android&reviewSortOrder=0&xhr=1",
dataType: 'text'
})
.done(function(data, textStatus, jqXHR) {
console.log("Reviews data fetched");
var html = JSON.parse(data.substring(data.indexOf("[[")))[0][2];
var jQueryObject = $($.parseHTML("<div>" + html + "</div>"));
$(jQueryObject).find('.single-review').each(function(){
var item = {
author: $(this).find('.author-name').text().trim(),
date: $(this).find('.review-date').text().trim(),
rating: $(this).find('.star-rating-non-editable-container').attr('aria-label').trim(),
title: $(this).find('.review-title').text().trim(),
text: $(this).find('.review-title').parent().contents()
.filter(function() {
return this.nodeType === 3;
}).text().trim()
};
result.push(item);
});
extractData(++page);
})
.fail(function(xhr, textStatus, errorThrown) {
console.log("Reviews API call failed");
context.finish();
});
} else {
context.finish(result);
}
};
extractData(0);
context.willFinishLater();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment