Created
April 17, 2012 06:42
-
-
Save piccolbo/2403946 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#predicate, group, select and aggregate are user defined functions | |
#it is assumed a vectorized version is used when needed | |
#pass through | |
mapreduce(input, | |
map = function(k,v) keyval(k,v)) | |
#vec version | |
#vectorized.map says how many records to process in one map, default 1 | |
mapreduce(input, | |
map = function(k,v) keyval(k,v, vectorized = TRUE), | |
vectorized = list(map = TRUE)) | |
#filter | |
mapreduce(input, | |
map = function(k,v) if(predicate(k,v)) keyval(k,v)) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) {filter = predicate(k,v); | |
keyval(k[filter], v[filter], vectorized = TRUE)}, | |
vectorized = list(map = TRUE)) | |
#vec version, structured case | |
#structured says to convert list to data frame. Fails if not possible. If TRUE, | |
#it means both map and reduce, or it is a named vector or list(map = TRUE, reduce = FALSE) | |
#default both FALSE | |
mapreduce(input, | |
map = function(k,v) {filter = predicate(k,v); | |
keyval(k[filter,], v[filter,], vectorized = TRUE)}, | |
vectorized = list(map = TRUE), | |
structured = list(map = TRUE)) | |
#select | |
mapreduce(input, | |
map = function(k,v) keyval(k, select(v))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(k, select(v), vectorized = TRUE), | |
vectorized = list(map = TRUE)) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(k, v[,fields], vectorized = TRUE), | |
vectorized = list(map = TRUE), | |
structured = list(map = TRUE)) | |
#bigsum | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v), | |
reduce = function(k, vv) keyval(k, sum(unlist(vv)))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v, vectorized = TRUE), | |
reduce = function(k, vv) keyval(k, sum(unlist(vv))), | |
vectorized = list(map = TRUE)) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v, vectorized = TRUE), | |
reduce = function(k, vv) keyval(k, sum(vv)) , | |
vectorized = list(map = TRUE), | |
structured = list(map = TRUE)) | |
#or structured = c(T,T) or list(map = TRUE, reduce = TRUE) | |
#embarrassingly parallel | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v), | |
reduce = function(k, vv) keyval(k, aggregate(vv))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v, vectorized = TRUE), | |
reduce = function(k, vv) keyval(k, aggregate(vv)), | |
vectorized = list(map = TRUE)) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v), | |
reduce = function(k, vv) keyval(k, some.function(vv)) | |
vectorized.map = TRUE, | |
structured = list(map = TRUE)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment