Skip to content

Instantly share code, notes, and snippets.

@k4v
Created April 3, 2021 01:48
Show Gist options
  • Save k4v/5d0d1425977fe7e228e7a1e538f72d68 to your computer and use it in GitHub Desktop.
Save k4v/5d0d1425977fe7e228e7a1e538f72d68 to your computer and use it in GitHub Desktop.
from mrjob.job import MRJob
import re
WORD_RE = re.compile(r"[\w']+")
class MRWordFreqCount(MRJob):
def mapper(self, _, line):
for word in WORD_RE.findall(line):
yield word.lower(), 1
def combiner(self, word, counts):
yield word, sum(counts)
def reducer(self, word, counts):
yield word, sum(counts)
if __name__ == '__main__':
MRWordFreqCount.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment