Skip to content

Instantly share code, notes, and snippets.

Created April 4, 2018 17:25
Show Gist options
  • Save lathropd/cb57cbfe08c83708239083705ab3f3ef to your computer and use it in GitHub Desktop.
Save lathropd/cb57cbfe08c83708239083705ab3f3ef to your computer and use it in GitHub Desktop.
paged results single page scraper created by lathropd -
let request = require('request')
let rp = require('request-promise-native');
let cheerio = require('cheerio');
let d3 = require('d3');
let fs = require('fs');
let sleep = require('thread-sleep');
var mainUrl = "";
var data = [];
function scrape(html) {
var $ = cheerio.load(html);
var jobs = $("div.result");
jobs.each(function (i, job) {
job = $(job);
var href = job.find("div.title a").attr('href');
var jobTitle = job.find("div.title a").text
var jobLocation = job.find("li.location").text();
var jobStatus = job.find("li.status").text();
var jobCompany = job.find("").text();
var jobPostedDate = job.find("li.posted").text();
var now = new Date();
now = now.toString();
var d = {
link: href,
title: jobTitle,
location: jobLocation,
status: jobStatus,
company: jobCompany,
posted: jobPostedDate,
scrapedAt: now
if (href) {
var nextPage = $(" a").attr('href');
if (nextPage) {
var nextUrl = ''+ nextPage;
rp( nextUrl )
.catch(function (err) {console.log(err)});
} else {
var csv = d3.csvFormat(data);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment