-
-
Save k4v/5d0d1425977fe7e228e7a1e538f72d68 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mrjob.job import MRJob | |
import re | |
WORD_RE = re.compile(r"[\w']+") | |
class MRWordFreqCount(MRJob): | |
def mapper(self, _, line): | |
for word in WORD_RE.findall(line): | |
yield word.lower(), 1 | |
def combiner(self, word, counts): | |
yield word, sum(counts) | |
def reducer(self, word, counts): | |
yield word, sum(counts) | |
if __name__ == '__main__': | |
MRWordFreqCount.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment