Eyad-Bereh/MultiFilesWordsFrequencies.py

## MultiFilesWordsFrequencies.py
import re
import sys

def WordFrequency(filename, case_sensitive = True):
    file = None
    file_content = None
    split_pattern = r"\t|\r|\n|\s+|\,|\.|\?|\!"

    # Exceptions might occur, we don't want to print the native ugly error messages
    try:
        file = open(filename, "r")
        file_content = file.read()
        file_content = file_content.rstrip(r"\r\t\n\s")

        if case_sensitive:
            file_content = re.split(pattern = split_pattern, string = file_content)    # Returns a list of strings contains matching strings
        else:
            file_content = re.split(pattern = split_pattern, string = file_content, flags = re.IGNORECASE)    # Returns a list of strings contains matching strings

        file_content = list(filter(None, file_content))     # Remove empty strings from the previous list
        file.close()

    except FileNotFoundError:
        print("Error in WordFrequency(): The file you've requested ({0}) wasn't found.".format(filename))
        return -1

    except IsADirectoryError:
        print("Error in WordFrequency(): ({0}) isn't a file , it's a directory.".format(filename))
        return -1

    except PermissionError:
        print("Error in WordFrequency(): Encountered a permission error for file ({0}).".format(filename))
        return -1

    dictionary = {}
    for word in file_content:
        temp = None
        if case_sensitive:
            temp = word
        else:   # If we don't really care about cases of keys , we'll just store it in lower case
            temp = word.lower()

        if dictionary.get(temp) == None:
            dictionary[temp] = 1
        else:
            dictionary[temp] += 1
    return dictionary

def WordReducer(dictionary_1, dictionary_2):
    dictionary = {}

    for word in dictionary_1:
        dictionary[word] = dictionary_1[word]


    for word in dictionary_2:
        if dictionary.get(word) == None:
            dictionary[word] = dictionary_2[word]
        else:
            dictionary[word] += dictionary_2[word]

    return dictionary

def PrettyPrintDictionary(dictionary, key_label, value_label):
    print()
    print()
    print("{:^20}|{:^20}".format(key_label, value_label))
    print("_______________________________________")
    for word in dictionary:
        print("{:^20}|{:^20}".format(word, dictionary[word]))
    print()
    print()


def main():
    files = None
    if len(sys.argv) == 1:
        print("Welcome to multi-files words frequencies counter.")
        files = input("To start , please enter files names separated by spaces:\n")
        files = re.split(r"\s+", files)
        files = list(filter(None, files))
        if (len(files) < 2):
            print("At least 2 files must be supplied as input")
            sys.exit()
    elif len(sys.argv) == 2:
        print("At least 2 files must be supplied in terminal arguments")
        sys.exit()
    else:
        files = sys.argv[1:]

    dictionaries = []
    for file in files:
        dictionary = WordFrequency(file, False)
        if dictionary == -1:
            print("Ignoring file ({0}).\n".format(file))
        else:
            dictionaries.append(dictionary)

    result = dictionaries[0]
    for i in range(1, len(dictionaries)):
        result = WordReducer(result, dictionaries[i])
    PrettyPrintDictionary(result, "Word", "Frequency")

main()
	import re
	import sys

	def WordFrequency(filename, case_sensitive = True):
	file = None
	file_content = None
	split_pattern = r"\t\|\r\|\n\|\s+\|\,\|\.\|\?\|\!"

	# Exceptions might occur, we don't want to print the native ugly error messages
	try:
	file = open(filename, "r")
	file_content = file.read()
	file_content = file_content.rstrip(r"\r\t\n\s")

	if case_sensitive:
	file_content = re.split(pattern = split_pattern, string = file_content) # Returns a list of strings contains matching strings
	else:
	file_content = re.split(pattern = split_pattern, string = file_content, flags = re.IGNORECASE) # Returns a list of strings contains matching strings

	file_content = list(filter(None, file_content)) # Remove empty strings from the previous list
	file.close()

	except FileNotFoundError:
	print("Error in WordFrequency(): The file you've requested ({0}) wasn't found.".format(filename))
	return -1

	except IsADirectoryError:
	print("Error in WordFrequency(): ({0}) isn't a file , it's a directory.".format(filename))
	return -1

	except PermissionError:
	print("Error in WordFrequency(): Encountered a permission error for file ({0}).".format(filename))
	return -1

	dictionary = {}
	for word in file_content:
	temp = None
	if case_sensitive:
	temp = word
	else: # If we don't really care about cases of keys , we'll just store it in lower case
	temp = word.lower()

	if dictionary.get(temp) == None:
	dictionary[temp] = 1
	else:
	dictionary[temp] += 1
	return dictionary

	def WordReducer(dictionary_1, dictionary_2):
	dictionary = {}

	for word in dictionary_1:
	dictionary[word] = dictionary_1[word]


	for word in dictionary_2:
	if dictionary.get(word) == None:
	dictionary[word] = dictionary_2[word]
	else:
	dictionary[word] += dictionary_2[word]

	return dictionary

	def PrettyPrintDictionary(dictionary, key_label, value_label):
	print()
	print()
	print("{:^20}\|{:^20}".format(key_label, value_label))
	print("_______________________________________")
	for word in dictionary:
	print("{:^20}\|{:^20}".format(word, dictionary[word]))
	print()
	print()


	def main():
	files = None
	if len(sys.argv) == 1:
	print("Welcome to multi-files words frequencies counter.")
	files = input("To start , please enter files names separated by spaces:\n")
	files = re.split(r"\s+", files)
	files = list(filter(None, files))
	if (len(files) < 2):
	print("At least 2 files must be supplied as input")
	sys.exit()
	elif len(sys.argv) == 2:
	print("At least 2 files must be supplied in terminal arguments")
	sys.exit()
	else:
	files = sys.argv[1:]

	dictionaries = []
	for file in files:
	dictionary = WordFrequency(file, False)
	if dictionary == -1:
	print("Ignoring file ({0}).\n".format(file))
	else:
	dictionaries.append(dictionary)

	result = dictionaries[0]
	for i in range(1, len(dictionaries)):
	result = WordReducer(result, dictionaries[i])
	PrettyPrintDictionary(result, "Word", "Frequency")

	main()