Skip to content

Instantly share code, notes, and snippets.

@piccolbo
Created March 22, 2012 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save piccolbo/2163002 to your computer and use it in GitHub Desktop.
Save piccolbo/2163002 to your computer and use it in GitHub Desktop.
Sketch of rmr future vector API, devil's version
#predicate, group and aggregate are user defined functions
#it is assumed a vectorized version is used when needed
#vectorized input format
native.1000 = make.input.format(nrecs = 1000)
#pass through
mapreduce(input,
map = function(k,v) keyval(k,v))
#vec version
#nrecs says how many records to process in one map
mapreduce(input,
map = function(k,v) vec.keyval(k,v),
input.format = native.1000)
#filter
mapreduce(input,
map = function(k,v) if(predicate(k,v)) keyval(k,v))
#vec version
mapreduce(input,
map = function(k,v) {filter = predicate(k,v)
vec.keyval(k[filter], v[filter])},
input.format = native.1000)
#vec version, structured case
mapreduce(input,
map = function(k,v) {k = structured(k)
v = structured(v)
filter = predicate(k,v)
vec.keyval(k[filter,], v[filter,])},
input.format = native.1000)
#select TODO
#bigsum
mapreduce(input,
map = function(k,v) keyval(group(k,v),v),
reduce = function(k, vv) keyval(k, sum(unlist(vv))))
#vec version
mapreduce(input,
map = function(k,v) vec.keyval(group(k,v),v),
reduce = function(k, vv) keyval(k, sum(unlist(vv))),
input.format = native.1000)
#vec version, structured case
mapreduce(input,
map = function(k,v) {
k = structured(k)
v = structured(v)
vec.keyval(group(k,v),v)},
reduce = function(k, vv) keyval(k, sum(structured(vv))) ,
input.format = native.1000)
#embarrassingly parallel
mapreduce(input,
map = function(k,v) keyval(group(k,v), v),
reduce = function(k, vv) keyval(k, aggregate(vv)))
#vec version
mapreduce(input,
map = function(k,v) vec.keyval(group(k,v), v),
reduce = function(k, vv) vec.keyval(k, aggregate(vv)),
input.format = native.1000)
#vec version, structured case
mapreduce(input,
map = function(k,v) vec.keyval(group(k,v), v),
reduce = function(k, vv) vec.keyval(k, some.function(vv))
map.on.data.frame = TRUE,
reduce.on.data.frame = TRUE,
input.format = native.1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment