Skip to content

Instantly share code, notes, and snippets.

@aocenas
Created November 19, 2012 14:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aocenas/4110999 to your computer and use it in GitHub Desktop.
Save aocenas/4110999 to your computer and use it in GitHub Desktop.
code to scrape ranking data from foosball.sk
var fs = require('fs');
var request = require('request');
var jsdom = require('jsdom');
var iconv = require('iconv-lite');
var rankOpen =
'http://www.foosball.sk/sk/zebricky/?action=view&rank_name_id=513';
var data = {};
data.players = [];
data.indexes = Object.create(null);
var callbacks = 0;
request(
{
uri: rankOpen//,
//headers: {
//'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
//}
},
wrap(mapPlayers, delayedDataWrite)
);
// wrap any function as a request callback
function wrap () {
var fn = arguments[0];
var args = Array.prototype.slice.call(arguments, 1);
return function (err, response, body) {
if (err && response.statusCode !== 200) {
console.log('Error when contacting foosball.sk')
}
console.log(response.headers);
// body should be the first argument then the others
args.unshift(body);
fn.apply(null, args);
}
}
function sanitize (html) {
html = html.replace(/<td(.*)th>/gi, /<td$1td>/);
return html;
}
function mapPlayers(body, callback) {
var decoded = iconv.decode(body, 'windows1250');
fs.writeFileSync('index.html', decoded);
// sanitize the <td></th> malformed combinations WAT?
decoded = sanitize(decoded);
jsdom.env(
{
html: decoded,
scripts: [
'//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js'
]
},
function (err, window) {
var $ = window.jQuery;
// jQuery is now loaded on the jsdom window created from 'agent.body'
var rows = $('div.content table tr');
callbacks = rows.length - 1;
rows.each(function (index, element) {
if (index > 0) {
console.log(
index + ' ' + $($(element).children().get(1)).text().trim()
);
var row = $(element).find('td');
var rank = +$(row.get(0)).text().trim();
var name = $(row.get(1)).text().trim();
var link = $(row.get(1)).find('a').attr('href');
console.log(link);
// get data about tournament rankings for each player
//request(
//link,
//wrap(mapResults, link, callback)
//);
var tournaments = +$(row.get(3)).text().trim();
var points = +$(row.get(8)).text().trim();
var player = {
rank: rank,
name: name,
tournaments: tournaments,
points: points
};
// create map where the results of the request for tournaments
// rankings will be stored
data.indexes[link] = data.players.push(player) - 1;
callback();
}
});
}
);
}
function mapResults (body, link, callback) {
jsdom.env({
html: sanitize(body),
scripts: [
'//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js'
]
},
function (err, window) {
var $ = window.jQuery;
var tournaments = [];
$('div.content table tr').each(function (index, element) {
if (index > 0 ) {
var row = $(element).find('td');
var tournament = {};
tournament.name = $(row.get(0)).find('a').text();
tournament.discipline = $(row.get(0)).find('span').text();
tournament.date = Date.parse($(row.get(1)).text());
tournament.place = +$(row.get(2)).text();
tournament.points = +$(row.get(3)).text();
tournaments.push(tournament);
}
});
data.players[data.indexes[link]].tournaments = tournaments;
callbacks--;
callback();
})
}
function delayedDataWrite () {
if (callbacks == 0) {
fs.writeFileSync('data.json', JSON.stringify(data, null, ' '));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment