Last active
November 6, 2017 14:34
-
-
Save email2liyang/8ba2958b9675c7eed0b8de9dc4e32d7b to your computer and use it in GitHub Desktop.
MR job to calculate movie's popularity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mrjob.job import MRJob | |
from mrjob.step import MRStep | |
class PopularityBreakdown(MRJob): | |
def steps(self): | |
return [ | |
MRStep(mapper=self.mapper_ratings, | |
reducer=self.reducer_ratings), | |
MRStep(reducer=self.reducer_sortings) | |
] | |
def mapper_ratings(self, _, line): | |
(userID, movieID, rating, timestamp) = line.split('\t') | |
yield movieID, 1 | |
def reducer_ratings(self, key, values): | |
yield str(sum(values)).zfill(5), key | |
def reducer_sortings(self,count,movies): | |
for movie in movies: | |
yield movie,count | |
if __name__ == '__main__': | |
PopularityBreakdown.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment