Created
November 30, 2018 14:36
-
-
Save Ojha-Shashikant/4bff175e23c449978667b273657e81dc to your computer and use it in GitHub Desktop.
Reading words and printing top 10 most occurring words in descending order also list of non-repeated words.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Reading words and printing top 10 most occurring words in descending order also list of non-repeated words''' | |
import re | |
def file(): | |
file_path = "C:\\Users\\Ojha\\Documents\\Python exercises\\Python Scripts\\" | |
file_name = "Paragraph.txt" | |
return file_path, file_name | |
def reading_file(file_path, file_name): | |
with open(file_path + file_name, 'r+') as FH: | |
text = FH.read() | |
text = text.lower() | |
word_list = re.findall(r"[a-zA-Z]+", text, re.I) | |
#text = text.split() | |
return word_list | |
def word_occurrence_calculator(word_list): | |
words = set(word_list) | |
word_occurrence = dict() | |
#print(words) | |
counter = 0 | |
for word in words: | |
counter = 0 | |
for old in word_list: | |
if word == old: | |
counter += 1 | |
word_occurrence[word] = counter | |
#print(word_occurrence) | |
return word_occurrence | |
def occurrence_sorting(word_occurrence): | |
x = sorted(word_occurrence.items(), key=lambda kv:kv[1], reverse=True) | |
non_repeated = list() | |
print("Top10 most occurred words are: ") | |
for idx in range(10): | |
print(x[idx]) | |
for k, v in word_occurrence.items(): | |
non_repeated.append(k) | |
sorted_non_repeated = sorted(non_repeated) | |
print("Non repeated word list: ", sorted_non_repeated) | |
# main starts here | |
if __name__ = '__main__': | |
path, name = file() | |
word_list = reading_file(path, name) | |
word_occurrence = word_occurrence_calculator(word_list) | |
occurrence_sorting(word_occurrence) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment