Created
March 27, 2012 18:46
-
-
Save piccolbo/2219047 to your computer and use it in GitHub Desktop.
Sketch of rmr vector API, answer to the devil
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#predicate, group, select and aggregate are user defined functions | |
#it is assumed a vectorized version is used when needed | |
#pass through | |
mapreduce(input, | |
map = function(k,v) keyval(k,v)) | |
#vec version | |
#vectorized.map says how many records to process in one map, default 1 | |
mapreduce(input, | |
map = function(k,v) keyval(k,v, vec = TRUE), | |
vectorized.map = 1000) | |
#filter | |
mapreduce(input, | |
map = function(k,v) if(predicate(k,v)) keyval(k,v)) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) {filter = predicate(k,v); | |
keyval(k[filter], v[filter], vec = TRUE)}, | |
vectorized.map = 1000) | |
#vec version, structured case | |
#structured says to convert list to data frame. Fails if not possible. If TRUE, | |
#it means both map and reduce, or it is a named vector or list(map = TRUE, reduce = FALSE) | |
#default both FALSE | |
mapreduce(input, | |
map = function(k,v) {filter = predicate(k,v); | |
keyval(k[filter,], v[filter,], vec = TRUE)}, | |
vectorized.map = 1000, | |
structured = T) | |
#select | |
mapreduce(input, | |
map = function(k,v) keyval(k, select(v))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(k, select(v), vec = TRUE), | |
vectorized.map = 1000) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(k, v[,fields], vec = TRUE), | |
vectorized.map = 1000, | |
structured = T) | |
#bigsum | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v), | |
reduce = function(k, vv) keyval(k, sum(unlist(vv)))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v, vec = TRUE), | |
reduce = function(k, vv) keyval(k, sum(unlist(vv))), | |
vectorized.map = 1000) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v),v, vec = TRUE), | |
reduce = function(k, vv) keyval(k, sum(vv)) , | |
vectorized.map = 1000, | |
structured = TRUE) | |
#or structured = c(T,T) or list(map = TRUE, reduce = TRUE) | |
#embarrassingly parallel | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v), | |
reduce = function(k, vv) keyval(k, aggregate(vv))) | |
#vec version | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v, vec = TRUE), | |
reduce = function(k, vv) keyval(k, aggregate(vv)), | |
vectorized.map = 1000) | |
#vec version, structured case | |
mapreduce(input, | |
map = function(k,v) keyval(group(k,v), v), | |
reduce = function(k, vv) keyval(k, some.function(vv)) | |
vectorized.map = TRUE, | |
structured = TRUE) | |
Soon, like after the next check in (chasing down some bugs right now). It's in a separate branch right now.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello Antonio,
and thank you for your answer.
When do you think is it possible to try the rmr version 1.3 ?
I would like to use the vectorization features as soon as possible.
Thanks,
Bastien