Skip to content

Instantly share code, notes, and snippets.

@lathropd
Last active April 4, 2018 17:33
Show Gist options
  • Save lathropd/8b773b863457fe2192076f8121d7dc9f to your computer and use it in GitHub Desktop.
Save lathropd/8b773b863457fe2192076f8121d7dc9f to your computer and use it in GitHub Desktop.
Single Page div-based scraper created by lathropd - https://repl.it/@lathropd/Single-Page-div-based-scraper
let request = require('request')
let rp = require('request-promise-native');
let cheerio = require('cheerio');
let d3 = require('d3');
let fs = require('fs');
let sleep = require('thread-sleep');
var mainUrl = "http://www.journalismjobs.com/job-listings";
var data = [];
rp(mainUrl)
.then(scrape)
.catch();
function scrape(html) {
var $ = cheerio.load(html);
var jobs = $("div.result");
jobs.each(function (i, job) {
job = $(job);
var href = job.find("div.title a").attr('href');
var jobTitle = job.find("div.title a").text
();
var jobLocation = job.find("li.location").text();
var jobStatus = job.find("li.status").text();
var jobCompany = job.find("div.company").text();
var jobPostedDate = job.find("li.posted").text();
var now = new Date();
now = now.toString();
var d = {
link: href,
title: jobTitle,
location: jobLocation,
status: jobStatus,
company: jobCompany,
posted: jobPostedDate,
scrapedAt: now
}
if (href) {
data.push(d);
}
});
var csv = d3.csvFormat(data);
console.log(csv);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment