cjoshmartin/main.py

## main.py
# Download file and have python version 3.8 or newer installed
# run "python main.py"

from pathlib import Path
import collections
import re


def top_n_words(input_file: str, common_words: str, n: int) -> None:
    input_data = Path(input_file).read_text()
    input_data = re.sub(' +', ' ', input_data ).replace('\n', '').split(' ')

    common_words_data = Path(f"{common_words}").read_text()
    common_words_data = set(common_words_data.split('\n'))

    number_of_common_words = {}

    for text in input_data:
        word = text.lower()
        if word not in common_words_data:
            if word in number_of_common_words:
                number_of_common_words[word] += 1
            else:
                number_of_common_words[word] = 1

    sorted_common_words = collections.OrderedDict(number_of_common_words).items()

    print(f"{'Count':<6} {'Word':<6}")
    print(f"{'===':<6} {'===':<6}")
    for word, count in list(sorted_common_words)[:n]:
        print(f"{count:<6} {word:<6}")


if __name__ == '__main__':
    top_n_words('alice_in_wonderland.txt', '1-1000.txt', 5)
	# Download file and have python version 3.8 or newer installed
	# run "python main.py"

	from pathlib import Path
	import collections
	import re


	def top_n_words(input_file: str, common_words: str, n: int) -> None:
	input_data = Path(input_file).read_text()
	input_data = re.sub(' +', ' ', input_data ).replace('\n', '').split(' ')

	common_words_data = Path(f"{common_words}").read_text()
	common_words_data = set(common_words_data.split('\n'))

	number_of_common_words = {}

	for text in input_data:
	word = text.lower()
	if word not in common_words_data:
	if word in number_of_common_words:
	number_of_common_words[word] += 1
	else:
	number_of_common_words[word] = 1

	sorted_common_words = collections.OrderedDict(number_of_common_words).items()

	print(f"{'Count':<6} {'Word':<6}")
	print(f"{'===':<6} {'===':<6}")
	for word, count in list(sorted_common_words)[:n]:
	print(f"{count:<6} {word:<6}")


	if __name__ == '__main__':
	top_n_words('alice_in_wonderland.txt', '1-1000.txt', 5)