Created
March 16, 2014 02:34
-
-
Save shadedyin/9577747 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var cheerio = require('cheerio'); | |
var numeral = require('numeral')(); | |
var rsvp = require('rsvp'); | |
var _ = require('underscore'); | |
var Model = require('./model'); | |
var Scraper = function(opts) { | |
var self = this; | |
self.onHTML = function($) { }; | |
self.onJSON = function($) { }; | |
_.extend(self, opts); | |
if(!url) { | |
throw new Error('Scraper: Cannot be created with out a "url".'); | |
} | |
self.timeout = opts.timeout || 60000; | |
var crawl = function(url) { | |
var json = rsvp.defer(); | |
var html = ''; | |
http.get(url, function(res) { | |
res.on('data', function(chunk) { | |
html += chunk; | |
}).on('end', function() { | |
var $ = cheerio.load(html); | |
json.resolve(self.onHTML($)); | |
}); | |
}).on('error', function(error) { json.reject(error); }); | |
return json.promise; | |
}; | |
var run = function() { | |
crawl(url).then(function(json) { | |
self.onJSON(json); | |
setTimeout(run, self.timeout); | |
}); | |
}; | |
return run(); | |
} | |
var url = | |
'http://staticresults.sos.la.gov/03152014/03152014_36_50211_Precinct.html' | |
var scraper = new Scraper({ | |
url: url, | |
key: 'sheriff', | |
runners: [ 'foti', 'gussman' ], | |
rows: function($) { return $('table tr') }, | |
cols: function($, row_el) { return $(row_el).children(); }, | |
onHTML: function($) { | |
var self = this; | |
var json = { ward: { }, results: { } }; | |
var rows = self.rows($); | |
rows.each(function(i, row_el) { | |
if(i === 0) { return; } | |
if(i === (rows.length - 1)) { return; } | |
var ward = null; | |
var precinct = null; | |
self.cols($, row_el).each(function(j, col_el) { | |
if(j === 0) { | |
var tmp = $(col_el).text().split(' '); | |
ward = Model.getWard(json, 'W' + tmp[0]); | |
precinct = Model.getPrecinct(ward, 'P' + tmp[1]); | |
} else { | |
var runner = self.runners[(j - 1) % 4]; | |
var value = numeral.unformat($(col_el).text()); | |
if(!json.results) { | |
json.results = { }; | |
} | |
if(!ward.results) { | |
ward.results = { }; | |
} | |
if(!precinct.results) { | |
precinct.results = { }; | |
} | |
if(!json.results[self.key]) { | |
json.results[self.key] = { }; | |
} | |
if(!ward.results[self.key]) { | |
ward.results[self.key] = { }; | |
} | |
if(!precinct.results[self.key]) { | |
precinct.results[self.key] = { }; | |
} | |
Model.set(precinct.results[self.key], runner, value); | |
Model.inc(ward.results[self.key], runner, value); | |
Model.inc(json.results[self.key], runner, value); | |
} | |
}); | |
}); | |
return json; | |
}, | |
onJSON: function(json) { | |
Model.ref.child('results').set(json.results); | |
for(var ward in json.ward) { | |
var Ward = Model.ref.child('ward').child(ward); | |
for(var election in json.ward[ward].results) { | |
Ward.child('results').child(election) | |
.set(json.ward[ward].results[election]); | |
for(var precinct in json.ward[ward].precinct) { | |
var Precinct = Ward.child('precinct').child(precinct); | |
Precinct.child('results').child(election) | |
.set(json.ward[ward].precinct[precinct].results[election]); | |
} | |
} | |
}; | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment