Skip to content

Instantly share code, notes, and snippets.

@laclefyoshi
Created March 5, 2011 04:31
Show Gist options
  • Save laclefyoshi/856115 to your computer and use it in GitHub Desktop.
Save laclefyoshi/856115 to your computer and use it in GitHub Desktop.
processing MapReduce with Akka/Actor and Jython
import glob
import operator
import time
from mapreduce import *
if __name__ == "__main__":
start_time = time.time()
input_files = glob.glob("Edgar Allan Poe/*.txt")
mapper = MapReduce(file_to_words, count_words)
word_counts = mapper(input_files)
word_counts.sort(key=operator.itemgetter(1))
word_counts.reverse()
print "\nTOP 30 WORDS BY FREQUENCY\n"
top30 = word_counts[:30]
longest = max(len(word) for word, count in top30)
for word, count in top30:
print "%-*s: %5s" % (longest + 1, word, count)
print "\nBOTTOM 30 WORDS BY FREQUENCY\n"
bottom30 = word_counts[:-30:-1]
longest = max(len(word) for word, count in bottom30)
for word, count in bottom30:
print "%-*s: %5s" % (longest + 1, word, count)
print "\n\nprocessing time (sec): ", time.time() - start_time
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment