Created
February 29, 2016 14:11
-
-
Save s-leroux/5efeaec1e39c4070c407 to your computer and use it in GitHub Desktop.
A sample code demonstrating how to output advanced datatypes from a map-reduce job using mrjob.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mrjob.job import MRJob | |
import mrjob.protocol | |
from decimal import Decimal | |
class MRCustomerTotalExpenses(MRJob): | |
INTERNAL_PROTOCOL = mrjob.protocol.PickleProtocol | |
OUTPUT_PROTOCOL = mrjob.protocol.ReprProtocol | |
def mapper(self, key, line): | |
customerID, itemID, amount = line.split(',') | |
yield customerID, Decimal(amount) | |
def reducer(self, customerID, amounts): | |
yield customerID, sum(amounts) | |
if __name__ == "__main__": | |
MRCustomerTotalExpenses.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment