Skip to content

Instantly share code, notes, and snippets.

@asimihsan
Created February 18, 2014 15:57
Show Gist options
  • Save asimihsan/9073719 to your computer and use it in GitHub Desktop.
Save asimihsan/9073719 to your computer and use it in GitHub Desktop.
Lesson 5 - MapReduce. Mapper and reducer with aadhaar data.
import csv
import sys
import string
def mapper(input=sys.stdin):
reader = csv.DictReader(input)
for row in reader: #cycle through lines of code
print "%s\t%s" % (row["District"], row["Aadhaar generated"])
mapper()
import sys
def reducer(input=sys.stdin):
elems = (line.strip().split("\t") for line in input if len(line.strip()) > 0)
aadhaar_generated = 0
old_key = None
for (key, value) in elems:
if old_key != key:
if old_key is not None:
print "%s\t%.1f" % (old_key, aadhaar_generated)
old_key = key
aadhaar_generated = 0
aadhaar_generated += int(value)
if old_key is not None:
print "%s\t%.1f" % (old_key, aadhaar_generated)
reducer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment