Skip to content

Instantly share code, notes, and snippets.

@email2liyang
Last active November 6, 2017 14:34
Show Gist options
  • Save email2liyang/8ba2958b9675c7eed0b8de9dc4e32d7b to your computer and use it in GitHub Desktop.
Save email2liyang/8ba2958b9675c7eed0b8de9dc4e32d7b to your computer and use it in GitHub Desktop.
MR job to calculate movie's popularity
from mrjob.job import MRJob
from mrjob.step import MRStep
class PopularityBreakdown(MRJob):
def steps(self):
return [
MRStep(mapper=self.mapper_ratings,
reducer=self.reducer_ratings),
MRStep(reducer=self.reducer_sortings)
]
def mapper_ratings(self, _, line):
(userID, movieID, rating, timestamp) = line.split('\t')
yield movieID, 1
def reducer_ratings(self, key, values):
yield str(sum(values)).zfill(5), key
def reducer_sortings(self,count,movies):
for movie in movies:
yield movie,count
if __name__ == '__main__':
PopularityBreakdown.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment