Skip to content

Instantly share code, notes, and snippets.

@Eyad-Bereh
Created March 13, 2019 22:51
Show Gist options
  • Save Eyad-Bereh/02a85ab7f0de86ce88959f9f456852fa to your computer and use it in GitHub Desktop.
Save Eyad-Bereh/02a85ab7f0de86ce88959f9f456852fa to your computer and use it in GitHub Desktop.
A multi-files words frequencies computing program made using python 3.6.8 , i've had this as a homework .
import re
import sys
def WordFrequency(filename, case_sensitive = True):
file = None
file_content = None
split_pattern = r"\t|\r|\n|\s+|\,|\.|\?|\!"
# Exceptions might occur, we don't want to print the native ugly error messages
try:
file = open(filename, "r")
file_content = file.read()
file_content = file_content.rstrip(r"\r\t\n\s")
if case_sensitive:
file_content = re.split(pattern = split_pattern, string = file_content) # Returns a list of strings contains matching strings
else:
file_content = re.split(pattern = split_pattern, string = file_content, flags = re.IGNORECASE) # Returns a list of strings contains matching strings
file_content = list(filter(None, file_content)) # Remove empty strings from the previous list
file.close()
except FileNotFoundError:
print("Error in WordFrequency(): The file you've requested ({0}) wasn't found.".format(filename))
return -1
except IsADirectoryError:
print("Error in WordFrequency(): ({0}) isn't a file , it's a directory.".format(filename))
return -1
except PermissionError:
print("Error in WordFrequency(): Encountered a permission error for file ({0}).".format(filename))
return -1
dictionary = {}
for word in file_content:
temp = None
if case_sensitive:
temp = word
else: # If we don't really care about cases of keys , we'll just store it in lower case
temp = word.lower()
if dictionary.get(temp) == None:
dictionary[temp] = 1
else:
dictionary[temp] += 1
return dictionary
def WordReducer(dictionary_1, dictionary_2):
dictionary = {}
for word in dictionary_1:
dictionary[word] = dictionary_1[word]
for word in dictionary_2:
if dictionary.get(word) == None:
dictionary[word] = dictionary_2[word]
else:
dictionary[word] += dictionary_2[word]
return dictionary
def PrettyPrintDictionary(dictionary, key_label, value_label):
print()
print()
print("{:^20}|{:^20}".format(key_label, value_label))
print("_______________________________________")
for word in dictionary:
print("{:^20}|{:^20}".format(word, dictionary[word]))
print()
print()
def main():
files = None
if len(sys.argv) == 1:
print("Welcome to multi-files words frequencies counter.")
files = input("To start , please enter files names separated by spaces:\n")
files = re.split(r"\s+", files)
files = list(filter(None, files))
if (len(files) < 2):
print("At least 2 files must be supplied as input")
sys.exit()
elif len(sys.argv) == 2:
print("At least 2 files must be supplied in terminal arguments")
sys.exit()
else:
files = sys.argv[1:]
dictionaries = []
for file in files:
dictionary = WordFrequency(file, False)
if dictionary == -1:
print("Ignoring file ({0}).\n".format(file))
else:
dictionaries.append(dictionary)
result = dictionaries[0]
for i in range(1, len(dictionaries)):
result = WordReducer(result, dictionaries[i])
PrettyPrintDictionary(result, "Word", "Frequency")
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment