Last active
April 29, 2019 18:12
-
-
Save bburnett071/3c9939d95817e6d7260c to your computer and use it in GitHub Desktop.
ES6 NodeJS script to process world cup data for group tournament scoring analysis. Blog: http://www.thebhwgroup.com/blog/2014/09/bi-nodejs-couchdb-mapreduce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict;"; | |
var co = require('co'); | |
var u = require('underscore'); | |
var fs = require('co-fs'); | |
var printf = require('printf'); | |
var moment = require('moment'); | |
var forEach = require('co-foreach'); | |
var nano = require('nano')('http://localhost:5984'); | |
var couch = require('co-nano')(nano); | |
co(function * () { | |
var re = /(\w{3})\/(\d{1,2})\s+\d+:\d+\s+(\w+)\s+(\d+)-(\d+)\s+(.*)\s+\@/g; | |
var result = []; | |
//read the finals | |
var file = yield fs.readFile('./final.txt', 'utf8'); | |
while( (matches = re.exec(file)) !== null ){ | |
result.push({ | |
date: moment(printf("%s %d, 2014", matches[1], matches[2])).toDate(), | |
home: matches[3].trim(), | |
away: matches[6].trim(), | |
home_score: parseInt(matches[4]), | |
away_score: parseInt(matches[5]), | |
finish: 3 | |
}); | |
} | |
//read the cup | |
file = yield fs.readFile('./cup.txt', 'utf8'); | |
while( (matches = re.exec(file)) !== null ){ | |
result.push({ | |
date: moment(printf("%s %d, 2014", matches[1], matches[2])).toDate(), | |
home: matches[3].trim(), | |
away: matches[6].trim(), | |
home_score: parseInt(matches[4]), | |
away_score: parseInt(matches[5]), | |
finish: 2 | |
}); | |
} | |
//read an entire directory of qualifiers... | |
re = /(\w{3})\/(\d{1,2})\s+(\d{4})\s+(\w+)\s+(\d+)\-(\d+)\s+(.*)\s+\@/g; | |
var files = yield fs.readdir('./quals/quals/2014--brazil/quali/'); | |
for( var i = 0; i < files.length; i++ ){ | |
file = yield fs.readFile('./quals/quals/2014--brazil/quali/' + files[i], 'utf8'); | |
while( (matches = re.exec(file)) !== null ){ | |
result.push({ | |
date: moment(printf("%s %d, %d", matches[1], matches[2], matches[3])).toDate(), | |
home: matches[4].trim(), | |
away: matches[7].trim(), | |
home_score: parseInt(matches[5]), | |
away_score: parseInt(matches[6]), | |
finish: 1 | |
}); | |
} | |
} | |
//get a list of the unique teams, both home and away | |
var teams = u.sortBy(u.uniq(u.union(u.pluck(result, 'home'), u.pluck(result, 'away'))), function(s){ return s;}); | |
var final_results = []; | |
u.each(teams, function(team){ | |
//Map the home and away result to a strucutre we can aggregate | |
var team_result = u.map(result, function(game){ | |
if( game.home === team ){ | |
return { | |
team: team, | |
goals_for: game.home_score, | |
goals_against: game.away_score, | |
win: game.home_score > game.away_score, | |
loss: game.away_score > game.home_score, | |
draw: game.home_score === game.away_score, | |
finish: game.finish | |
}; | |
} else if( game.away === team){ | |
return { | |
team: team, | |
goals_for: game.away_score, | |
goals_against: game.home_score, | |
win: game.home_score < game.away_score, | |
loss: game.away_score < game.home_score, | |
draw: game.home_score === game.away_score, | |
finish: game.finish | |
}; | |
} else { | |
return null; | |
} | |
}); | |
var state = { | |
team: team, | |
goals_for: 0, | |
goals_against: 0, | |
win: 0, | |
loss: 0, | |
draw: 0, | |
points: 0, | |
goal_diff: 0, | |
games: 0, | |
finish: 0 | |
}; | |
//reduce the home and away results to tally the team's result | |
var team_final = u.reduce(team_result, function(memo, res){ | |
if( res === null ){ | |
return memo; | |
} else { | |
memo.goals_for += res.goals_for || 0; | |
memo.goals_against += res.goals_against || 0; | |
memo.win += res.win === true ? 1 : 0; | |
memo.loss += res.loss === true ? 1 : 0; | |
memo.draw += res.draw === true ? 1 : 0; | |
memo.points += Math.min(3, res.goals_for || 0) + (res.win === true ? 1 : 0 ) * 3 + (res.draw === true ? 1 : 0 ); | |
memo.goal_diff += (res.goals_for || 0) - (res.goals_against || 0 ); | |
memo.games += 1; | |
memo.finish = Math.max(memo.finish, res.finish); | |
return memo; | |
} | |
}, state); | |
//fix-up our finish enumeration | |
if( team_final.finish === 3 ){ | |
team_final.finish = 'Finals'; | |
} else if( team_final.finish === 2 ){ | |
team_final.finish = 'Cup'; | |
} else if ( team_final.finish === 1 ){ | |
team_final.finish = 'Qualifiers'; | |
} | |
final_results.push(team_final); | |
}); | |
//record our results in CouchDB | |
var db = couch.use('world_cup_2014'); | |
forEach( final_results, function* (fres){ | |
var x = yield db.insert(fres, fres.team); | |
}).then(function(){ | |
console.log('success'); | |
}, function(err){ | |
console.log(err); | |
}); | |
console.log(JSON.stringify(final_results, null, 2)); | |
})(); |
Great suggestions on lodash or native V8 map/reduce functions for performance. This jsperf seems to imply that lodash may even be faster than V8's native map/reduce implementation, http://jsperf.com/fast-vs-lodash/12. lodash would be a drop-in replacement here from underscore.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Interesting little program. You could have used the native ES5 functional javascript functions: map and reduce or the Lodash library which is a spinoff of Underscore but geared towards speed and performance.
I might try to implement the same kind of script but using Postgres as it supports Json and soon will have Jsonb in its 9.4 version.