lathropd/paged results single page scraper.js

## paged results single page scraper.js
let request = require('request')
let rp = require('request-promise-native');
let cheerio = require('cheerio');
let d3 = require('d3');
let fs = require('fs');
let sleep = require('thread-sleep');

var mainUrl = "http://www.journalismjobs.com/job-listings";

var data = [];

rp(mainUrl)
  .then(scrape)
  .catch();

function scrape(html) {
    var $ = cheerio.load(html);
    var jobs = $("div.result");
    jobs.each(function (i, job) {
      job = $(job);
      var href = job.find("div.title a").attr('href');
      var jobTitle =   job.find("div.title a").text
      ();
      var jobLocation = job.find("li.location").text();
      var jobStatus = job.find("li.status").text();
      var jobCompany = job.find("div.company").text();
      var jobPostedDate = job.find("li.posted").text();

      var now = new Date();
      now = now.toString();

      var d = {
        link: href,
        title: jobTitle,
        location: jobLocation,
        status: jobStatus,
        company: jobCompany,
        posted: jobPostedDate,
        scrapedAt: now
      }
      if (href) {
        data.push(d);
      }
    });

    var nextPage = $("li.next a").attr('href');

    if (nextPage)  {
      console.log(data.length)
      var nextUrl = 'http://www.journalismjobs.com'+ nextPage;
      console.log(nextUrl);
      sleep(1000);
      rp( nextUrl )
        .then(scrape)
        .catch(function (err) {console.log(err)});
    } else {
      var csv = d3.csvFormat(data);
      console.log(csv);
    }
}
	let request = require('request')
	let rp = require('request-promise-native');
	let cheerio = require('cheerio');
	let d3 = require('d3');
	let fs = require('fs');
	let sleep = require('thread-sleep');

	var mainUrl = "http://www.journalismjobs.com/job-listings";

	var data = [];

	rp(mainUrl)
	.then(scrape)
	.catch();

	function scrape(html) {
	var $ = cheerio.load(html);
	var jobs = $("div.result");
	jobs.each(function (i, job) {
	job = $(job);
	var href = job.find("div.title a").attr('href');
	var jobTitle = job.find("div.title a").text
	();
	var jobLocation = job.find("li.location").text();
	var jobStatus = job.find("li.status").text();
	var jobCompany = job.find("div.company").text();
	var jobPostedDate = job.find("li.posted").text();

	var now = new Date();
	now = now.toString();

	var d = {
	link: href,
	title: jobTitle,
	location: jobLocation,
	status: jobStatus,
	company: jobCompany,
	posted: jobPostedDate,
	scrapedAt: now
	}
	if (href) {
	data.push(d);
	}
	});

	var nextPage = $("li.next a").attr('href');

	if (nextPage) {
	console.log(data.length)
	var nextUrl = 'http://www.journalismjobs.com'+ nextPage;
	console.log(nextUrl);
	sleep(1000);
	rp( nextUrl )
	.then(scrape)
	.catch(function (err) {console.log(err)});
	} else {
	var csv = d3.csvFormat(data);
	console.log(csv);
	}
	}