Skip to content

Instantly share code, notes, and snippets.

@lathropd
Created April 3, 2018 15:06
Show Gist options
  • Save lathropd/3435584f8cbc896d054aeacb9f1cb63e to your computer and use it in GitHub Desktop.
Save lathropd/3435584f8cbc896d054aeacb9f1cb63e to your computer and use it in GitHub Desktop.
Single Page table scraper created by lathropd - https://repl.it/@lathropd/Single-Page-table-scraper
let request = require('request')
let rp = require('request-promise-native');
let cheerio = require('cheerio');
let d3 = require('d3');
let fs = require('fs');
var mainUrl = "http://www.spotrac.com/mlb/payroll";
var teamList = [];
var playerSalaries = [];
rp(mainUrl)
.then(scrape)
.catch();
function scrape(html) {
var $ = cheerio.load(html);
var table = $("table").first();
var rows = table.find("tr");
var header = rows.first();
rows = rows.slice(1);
var data = [];
rows.each(function (i, row) {
row = $(row);
var href = row.find(".player a").attr('href');
var cells = row.find("td");
var d = {
link: href,
rank: $(cells[0]).text(),
team: $(cells[1]).text(),
roster: $(cells[2]).text(),
twentyFiveMan: $(cells[3]).text(),
disabledList: $(cells[4]).text(),
retained: $(cells[5]).text(),
buried: $(cells[6]).text(),
suspended: $(cells[7]).text()
}
if (href) {
data.push(d);
}
});
var csv = d3.csvFormat(data);
console.log(csv);
var teamUrls = data.map(function (team) {
return team.link;
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment