-
-
Save zdzolton/1568840 to your computer and use it in GitHub Desktop.
Hadoop Streaming with Node.js (CoffeeScript edition)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env coffee | |
stdin = process.openStdin() | |
stdout = process.stdout | |
input = '' | |
stdin.setEncoding 'utf8' | |
stdin.on 'data', (data) -> | |
return unless data? | |
input += data; | |
while input.match /\r?\n/ | |
input = RegExp.rightContext | |
proc RegExp.leftContext | |
stdin.on 'end', -> | |
proc input if input? | |
proc = (line) -> | |
words = line.split ' ' | |
stdout.write "#{words[8]},1\n" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env coffee | |
stdin = process.openStdin() | |
stdout = process.stdout | |
counter = {} | |
input = '' | |
stdin.setEncoding 'utf8' | |
stdin.setEncoding 'utf8' | |
stdin.on 'data', (data) -> | |
return unless data? | |
input += data | |
while input.match /\r?\n/ | |
input = RegExp.rightContext | |
proc RegExp.leftContext | |
stdin.on 'end', -> | |
proc input if input? | |
stdout.write "#{k}:#{v}\n" for k, v of counter | |
proc = (line) -> | |
words = line.split ',' | |
word = words[0] | |
# I'm gonna assume base10 radix here? | |
count = parseInt words[1], 10 | |
if not counter[word] then counter[word] = 1 | |
else counter[word] += count |
Also, using the leftContext and rightContext properties of the RegExp object was a snazzy (though, I thought obsoleted?) feature of the original code. Hmm...
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My first pass at translating an existing JavaScript gist to CoffeeScript. This could also help me learn about Hadoop. :)