Created
March 13, 2019 22:51
-
-
Save Eyad-Bereh/02a85ab7f0de86ce88959f9f456852fa to your computer and use it in GitHub Desktop.
A multi-files words frequencies computing program made using python 3.6.8 , i've had this as a homework .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
def WordFrequency(filename, case_sensitive = True): | |
file = None | |
file_content = None | |
split_pattern = r"\t|\r|\n|\s+|\,|\.|\?|\!" | |
# Exceptions might occur, we don't want to print the native ugly error messages | |
try: | |
file = open(filename, "r") | |
file_content = file.read() | |
file_content = file_content.rstrip(r"\r\t\n\s") | |
if case_sensitive: | |
file_content = re.split(pattern = split_pattern, string = file_content) # Returns a list of strings contains matching strings | |
else: | |
file_content = re.split(pattern = split_pattern, string = file_content, flags = re.IGNORECASE) # Returns a list of strings contains matching strings | |
file_content = list(filter(None, file_content)) # Remove empty strings from the previous list | |
file.close() | |
except FileNotFoundError: | |
print("Error in WordFrequency(): The file you've requested ({0}) wasn't found.".format(filename)) | |
return -1 | |
except IsADirectoryError: | |
print("Error in WordFrequency(): ({0}) isn't a file , it's a directory.".format(filename)) | |
return -1 | |
except PermissionError: | |
print("Error in WordFrequency(): Encountered a permission error for file ({0}).".format(filename)) | |
return -1 | |
dictionary = {} | |
for word in file_content: | |
temp = None | |
if case_sensitive: | |
temp = word | |
else: # If we don't really care about cases of keys , we'll just store it in lower case | |
temp = word.lower() | |
if dictionary.get(temp) == None: | |
dictionary[temp] = 1 | |
else: | |
dictionary[temp] += 1 | |
return dictionary | |
def WordReducer(dictionary_1, dictionary_2): | |
dictionary = {} | |
for word in dictionary_1: | |
dictionary[word] = dictionary_1[word] | |
for word in dictionary_2: | |
if dictionary.get(word) == None: | |
dictionary[word] = dictionary_2[word] | |
else: | |
dictionary[word] += dictionary_2[word] | |
return dictionary | |
def PrettyPrintDictionary(dictionary, key_label, value_label): | |
print() | |
print() | |
print("{:^20}|{:^20}".format(key_label, value_label)) | |
print("_______________________________________") | |
for word in dictionary: | |
print("{:^20}|{:^20}".format(word, dictionary[word])) | |
print() | |
print() | |
def main(): | |
files = None | |
if len(sys.argv) == 1: | |
print("Welcome to multi-files words frequencies counter.") | |
files = input("To start , please enter files names separated by spaces:\n") | |
files = re.split(r"\s+", files) | |
files = list(filter(None, files)) | |
if (len(files) < 2): | |
print("At least 2 files must be supplied as input") | |
sys.exit() | |
elif len(sys.argv) == 2: | |
print("At least 2 files must be supplied in terminal arguments") | |
sys.exit() | |
else: | |
files = sys.argv[1:] | |
dictionaries = [] | |
for file in files: | |
dictionary = WordFrequency(file, False) | |
if dictionary == -1: | |
print("Ignoring file ({0}).\n".format(file)) | |
else: | |
dictionaries.append(dictionary) | |
result = dictionaries[0] | |
for i in range(1, len(dictionaries)): | |
result = WordReducer(result, dictionaries[i]) | |
PrettyPrintDictionary(result, "Word", "Frequency") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment