diogommartins/mapreducemax.py

## mapreducemax.py
import multiprocessing
import operator
import string
from functools import reduce
import itertools


def remove_punctuation(text):
    translator = str.maketrans({key: None for key in string.punctuation})

    return text.translate(translator)


def map_func(text):
    table = {}  # hashtable auxiliar

    for word in remove_punctuation(text).split():  # separa linha em lista de palavras, removendo pontuações
        if word.isalpha():
            word = word.lower()
            try:
                table[word] += 1
            except KeyError:
                table[word] = 1
    return table


def reduce_func(a, b):
    for k, v in b.items():
        try:
            a[k] += v
        except KeyError:
            a[k] = v
    return a


with open('./lorem.txt') as f:
    # Le arquivo, separa em linhas e remove '\n'
    paragraphs = (p for p in f.readlines() if p != '\n')
    # Cria pool de processos
    pool = multiprocessing.Pool()
    # map -> para cada linha, cria tabela de ocorrencias. Trabalho é paralelizado em processos separados e
    # não concorrentes
    map_values = pool.map(map_func, paragraphs)

    chained_results = itertools.chain(map_values)
    # redução do conjunto de tabelas em uma única tabela
    reduced_values = reduce(reduce_func, chained_results)

    # ordena resultados de forma decrescente
    sorted_desc = sorted(reduced_values.items(), key=operator.itemgetter(1), reverse=True)

    # imprime
    for word, count in sorted_desc:
        print("{word} tem {count} ocorrencias".format(word=word, count=count))
	import multiprocessing
	import operator
	import string
	from functools import reduce
	import itertools


	def remove_punctuation(text):
	translator = str.maketrans({key: None for key in string.punctuation})

	return text.translate(translator)


	def map_func(text):
	table = {} # hashtable auxiliar

	for word in remove_punctuation(text).split(): # separa linha em lista de palavras, removendo pontuações
	if word.isalpha():
	word = word.lower()
	try:
	table[word] += 1
	except KeyError:
	table[word] = 1
	return table


	def reduce_func(a, b):
	for k, v in b.items():
	try:
	a[k] += v
	except KeyError:
	a[k] = v
	return a


	with open('./lorem.txt') as f:
	# Le arquivo, separa em linhas e remove '\n'
	paragraphs = (p for p in f.readlines() if p != '\n')
	# Cria pool de processos
	pool = multiprocessing.Pool()
	# map -> para cada linha, cria tabela de ocorrencias. Trabalho é paralelizado em processos separados e
	# não concorrentes
	map_values = pool.map(map_func, paragraphs)

	chained_results = itertools.chain(map_values)
	# redução do conjunto de tabelas em uma única tabela
	reduced_values = reduce(reduce_func, chained_results)

	# ordena resultados de forma decrescente
	sorted_desc = sorted(reduced_values.items(), key=operator.itemgetter(1), reverse=True)

	# imprime
	for word, count in sorted_desc:
	print("{word} tem {count} ocorrencias".format(word=word, count=count))