Skip to content

Instantly share code, notes, and snippets.

@piccolbo
Created April 17, 2012 06:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save piccolbo/2403946 to your computer and use it in GitHub Desktop.
Save piccolbo/2403946 to your computer and use it in GitHub Desktop.
#predicate, group, select and aggregate are user defined functions
#it is assumed a vectorized version is used when needed
#pass through
mapreduce(input,
map = function(k,v) keyval(k,v))
#vec version
#vectorized.map says how many records to process in one map, default 1
mapreduce(input,
map = function(k,v) keyval(k,v, vectorized = TRUE),
vectorized = list(map = TRUE))
#filter
mapreduce(input,
map = function(k,v) if(predicate(k,v)) keyval(k,v))
#vec version
mapreduce(input,
map = function(k,v) {filter = predicate(k,v);
keyval(k[filter], v[filter], vectorized = TRUE)},
vectorized = list(map = TRUE))
#vec version, structured case
#structured says to convert list to data frame. Fails if not possible. If TRUE,
#it means both map and reduce, or it is a named vector or list(map = TRUE, reduce = FALSE)
#default both FALSE
mapreduce(input,
map = function(k,v) {filter = predicate(k,v);
keyval(k[filter,], v[filter,], vectorized = TRUE)},
vectorized = list(map = TRUE),
structured = list(map = TRUE))
#select
mapreduce(input,
map = function(k,v) keyval(k, select(v)))
#vec version
mapreduce(input,
map = function(k,v) keyval(k, select(v), vectorized = TRUE),
vectorized = list(map = TRUE))
#vec version, structured case
mapreduce(input,
map = function(k,v) keyval(k, v[,fields], vectorized = TRUE),
vectorized = list(map = TRUE),
structured = list(map = TRUE))
#bigsum
mapreduce(input,
map = function(k,v) keyval(group(k,v),v),
reduce = function(k, vv) keyval(k, sum(unlist(vv))))
#vec version
mapreduce(input,
map = function(k,v) keyval(group(k,v),v, vectorized = TRUE),
reduce = function(k, vv) keyval(k, sum(unlist(vv))),
vectorized = list(map = TRUE))
#vec version, structured case
mapreduce(input,
map = function(k,v) keyval(group(k,v),v, vectorized = TRUE),
reduce = function(k, vv) keyval(k, sum(vv)) ,
vectorized = list(map = TRUE),
structured = list(map = TRUE))
#or structured = c(T,T) or list(map = TRUE, reduce = TRUE)
#embarrassingly parallel
mapreduce(input,
map = function(k,v) keyval(group(k,v), v),
reduce = function(k, vv) keyval(k, aggregate(vv)))
#vec version
mapreduce(input,
map = function(k,v) keyval(group(k,v), v, vectorized = TRUE),
reduce = function(k, vv) keyval(k, aggregate(vv)),
vectorized = list(map = TRUE))
#vec version, structured case
mapreduce(input,
map = function(k,v) keyval(group(k,v), v),
reduce = function(k, vv) keyval(k, some.function(vv))
vectorized.map = TRUE,
structured = list(map = TRUE))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment