Skip to content

Instantly share code, notes, and snippets.

@jongwook
Created September 23, 2014 13:48
Show Gist options
  • Save jongwook/4758ed89b667b751c860 to your computer and use it in GitHub Desktop.
Save jongwook/4758ed89b667b751c860 to your computer and use it in GitHub Desktop.
Julia Hadoop Streaming
module MapReduce
function map(mapper::Function)
for line in eachline(STDIN)
tokens = split(strip(line), '\t')
for result in @task mapper(tokens...)
println(join(result,'\t'))
end
end
end
type ReduceContext
key
params
end
function reduce(reducer::Function)
last = ReduceContext(None, None)
reader = () -> begin
if last.params != None
produce(last.params)
end
for line in eachline(STDIN)
tokens = split(strip(line), '\t')
key, params = tokens[1], tokens[2:end]
if key != last.key
last.key, last.params = key, params
return
end
produce(params)
end
last.key = None
end
consume(@task reader())
while last.key != None
for result in @task reducer(last.key, @task reader())
println(join(result, '\t'))
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment