Skip to content

Instantly share code, notes, and snippets.

@Ojha-Shashikant
Created November 30, 2018 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ojha-Shashikant/4bff175e23c449978667b273657e81dc to your computer and use it in GitHub Desktop.
Save Ojha-Shashikant/4bff175e23c449978667b273657e81dc to your computer and use it in GitHub Desktop.
Reading words and printing top 10 most occurring words in descending order also list of non-repeated words.
''' Reading words and printing top 10 most occurring words in descending order also list of non-repeated words'''
import re
def file():
file_path = "C:\\Users\\Ojha\\Documents\\Python exercises\\Python Scripts\\"
file_name = "Paragraph.txt"
return file_path, file_name
def reading_file(file_path, file_name):
with open(file_path + file_name, 'r+') as FH:
text = FH.read()
text = text.lower()
word_list = re.findall(r"[a-zA-Z]+", text, re.I)
#text = text.split()
return word_list
def word_occurrence_calculator(word_list):
words = set(word_list)
word_occurrence = dict()
#print(words)
counter = 0
for word in words:
counter = 0
for old in word_list:
if word == old:
counter += 1
word_occurrence[word] = counter
#print(word_occurrence)
return word_occurrence
def occurrence_sorting(word_occurrence):
x = sorted(word_occurrence.items(), key=lambda kv:kv[1], reverse=True)
non_repeated = list()
print("Top10 most occurred words are: ")
for idx in range(10):
print(x[idx])
for k, v in word_occurrence.items():
non_repeated.append(k)
sorted_non_repeated = sorted(non_repeated)
print("Non repeated word list: ", sorted_non_repeated)
# main starts here
if __name__ = '__main__':
path, name = file()
word_list = reading_file(path, name)
word_occurrence = word_occurrence_calculator(word_list)
occurrence_sorting(word_occurrence)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment