Skip to content

Instantly share code, notes, and snippets.

function pageFunction(context) {
// called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
var result = [];
var content = $('meta[id="_bootstrap-layout-init"]').attr("content");
var api_key = JSON.parse(content).api_config.key;
var listId = context.request.url.match(/[^\/]+$/g);
var getReviewData = function(offset) {
function pageFunction(context) {
var PAGES = 100; // each page has 40 reviews
// called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
var result = [];
var extractData = function(page) {
if( page < PAGES ) {
var api = "https://play.google.com/store/getreviews?authuser=0";
$.ajax({
url: api,
function pageFunction(context) {
// called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
if (context.request.label === 'start') {
context.skipOutput();
var count = parseInt($('.count').text());
for(var i=0; i<=count; i+=20) {
context.enqueuePage("http://www.topshop.com/webapp/wcs/stores/servlet/CatalogNavigationAjaxSearchResultCmd?storeId=12556&catalogId=33057&langId=-1&dimSelected=%2Fen%2Ftsuk%2Fcategory%2Fclothing-427%2FN-82zZdgl%3FNo%3D" + i + "%26Nrpp%3D20%26siteId%3D%252F12556%26categoryId%3D203984");
}
} else {
function pageFunction(context) {
// called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
if (context.request.label === 'detail') {
context.skipLinks();
// return data from internal JS variable
return property;
} else {
context.skipOutput();
// enqueue next pages in pagination
// originaly from https://kb.apify.com/tips-and-tricks/scraping-data-from-websites-using-schemaorg-microdata
function schemaOrgParser() {
var extractValue = function(elem) {
return $(elem).attr("content") || $(elem).text()
|| $(elem).attr("src") || $(elem).attr("href") || null;
};
var addProperty = function(item,propName,value) {
if( typeof(value)==='string' )
value = value.trim();
if( Array.isArray(item[propName]) )