Skip to content

Instantly share code, notes, and snippets.

@eknowles
Created November 23, 2015 10:40
Show Gist options
  • Save eknowles/f5da0f90d784a1603a81 to your computer and use it in GitHub Desktop.
Save eknowles/f5da0f90d784a1603a81 to your computer and use it in GitHub Desktop.
var request = require('request');
var cheerio = require('cheerio');
var csv = require('ya-csv');
var fs = require('fs');
// npm i ya-csv cheerio request
url = 'http://www.bbc.co.uk/corporate2/insidethebbc/managementstructure/biographies';
var writer = csv.createCsvStreamWriter(fs.createWriteStream('salaries.csv'));
writer.writeRecord(['name', 'title', 'salary', 'remuneration', 'link']);
request(url, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
var people = [];
$('li', '.text').filter(function () {
var data = $(this);
var person = {name: '', title: '', salary: 0, remuneration: 0, link: ''};
var link = data.find('a');
person.name = data.text().split(',')[0];
person.title = data.text().replace(person.name + ', ', '');
person.link = link.attr('href');
if (person.name === 'John Shield') {
person.link = 'http://www.bbc.co.uk/aboutthebbc/insidethebbc/managementstructure/biographies/shield_john.html';
}
if (person.link.substring(0, 20) !== 'http://www.bbc.co.uk') {
person.link = 'http://www.bbc.co.uk' + person.link;
}
people.push(person);
});
people.forEach(function (person, index, array) {
request(person.link, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
var salary = $('#heading-salary-and-total-remuneration').next().children().contents();
salary.each(function (i, elem) {
var d = elem.data;
var salary_re = /Salary\: £([0-9]*)\,([0-9]*)/;
var remuneration_re = /Total remuneration\: £([0-9]*)\,([0-9]*)/;
if (salary_re.test(d)) {
person.salary = parseInt(d.replace(salary_re, '$1$2'));
}
else if (remuneration_re.test(d)) {
person.remuneration = parseInt(d.replace(remuneration_re, '$1$2'));
}
});
writer.writeRecord([person.name, person.title, person.salary, person.remuneration, person.link]);
}
});
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment