Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@mtrunkat
Last active March 5, 2020 12:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtrunkat/61ab1486254316138b4e1f813d5642ac to your computer and use it in GitHub Desktop.
Save mtrunkat/61ab1486254316138b4e1f813d5642ac to your computer and use it in GitHub Desktop.
function pageFunction(context) {
var $ = context.jQuery
var posts = $('.athing').toArray(); // All posts as array of DOM elements
var $moreLink = $('.morelink'); // Link to next page
// If crawler is scraping 2nd, 3rd, ... page then
// context.request.referrer.pageFunctionResult contains
// result from previous pages.
var prevResult = context.request.referrer
? context.request.referrer.pageFunctionResult
: {};
if ($moreLink.length) {
// Enqueue next page to be crawled.
context.enqueuePage({ url: $moreLink.prop('href') });
// If there is link to another page then we don't want to output
// result of this page in order to merge it with the next one
context.skipOutput();
}
// Scrape the data and merge them with previous results.
return posts.reduce(function (result, el) {
var $el = $(el);
var link = $el.find('.storylink').attr('href');
var rank = 101 - parseInt($el.find('.rank').text());
result[link] = rank;
return result;
}, prevResult);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment