laclefyoshi/mapreduce_main.py

## mapreduce_main.py
import glob
import operator
import time

from mapreduce import *


if __name__ == "__main__":
    start_time = time.time()
    input_files = glob.glob("Edgar Allan Poe/*.txt")
    mapper = MapReduce(file_to_words, count_words)
    word_counts = mapper(input_files)
    word_counts.sort(key=operator.itemgetter(1))
    word_counts.reverse()

    print "\nTOP 30 WORDS BY FREQUENCY\n"
    top30 = word_counts[:30]
    longest = max(len(word) for word, count in top30)
    for word, count in top30:
        print "%-*s: %5s" % (longest + 1, word, count)

    print "\nBOTTOM 30 WORDS BY FREQUENCY\n"
    bottom30 = word_counts[:-30:-1]
    longest = max(len(word) for word, count in bottom30)
    for word, count in bottom30:
        print "%-*s: %5s" % (longest + 1, word, count)
    print "\n\nprocessing time (sec): ", time.time() - start_time
	import glob
	import operator
	import time

	from mapreduce import *


	if __name__ == "__main__":
	start_time = time.time()
	input_files = glob.glob("Edgar Allan Poe/*.txt")
	mapper = MapReduce(file_to_words, count_words)
	word_counts = mapper(input_files)
	word_counts.sort(key=operator.itemgetter(1))
	word_counts.reverse()

	print "\nTOP 30 WORDS BY FREQUENCY\n"
	top30 = word_counts[:30]
	longest = max(len(word) for word, count in top30)
	for word, count in top30:
	print "%-*s: %5s" % (longest + 1, word, count)

	print "\nBOTTOM 30 WORDS BY FREQUENCY\n"
	bottom30 = word_counts[:-30:-1]
	longest = max(len(word) for word, count in bottom30)
	for word, count in bottom30:
	print "%-*s: %5s" % (longest + 1, word, count)
	print "\n\nprocessing time (sec): ", time.time() - start_time