Skip to content

Instantly share code, notes, and snippets.

@sc68cal
Created April 23, 2011 20:41
Show Gist options
  • Save sc68cal/938960 to your computer and use it in GitHub Desktop.
Save sc68cal/938960 to your computer and use it in GitHub Desktop.
MapReduce example
#!/usr/bin/env python
import sys
import re
prog = re.compile("From.*\S{3}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2}\s\d{2}:\d{2}:\d{2}\s\d{4}")
for line in sys.stdin:
res = prog.search(line.strip())
if res:
res = res.group().split(" ")
print '%s %s %s %s\t%s' % (res[2],res[3],res[4],res[6],1)
#!/usr/bin/env python
import sys
results = {}
for line in sys.stdin:
line = line.strip()
date,count = line.split("\t",1)
try:
count = int(count)
except:
pass
results[date] = results.get(date,0) + count
for date,count in results.iteritems():
print '%s\t%s' % (date,count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment