Skip to content

Instantly share code, notes, and snippets.

@FloydanTheBeast
Created December 21, 2018 14:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FloydanTheBeast/d78033ce1fd359bfde6ea1d7047cdf5d to your computer and use it in GitHub Desktop.
Save FloydanTheBeast/d78033ce1fd359bfde6ea1d7047cdf5d to your computer and use it in GitHub Desktop.
from nltk import sent_tokenize, word_tokenize
from string import punctuation
from collections import defaultdict
import operator
def count_one_symbol_words(word_list):
counter = 0
for word in word_list:
if len(word) == 1 and word not in punctuation:
counter += 1
return counter
text = open('text_for_test.txt', 'r+').read()
sents = sent_tokenize(text)
word_counter = 0
raw_word_counter = 0
most_one_symbols = 0
most_one_symbols_counter = 0
frequency_dict = defaultdict(int)
for sent in sents:
word_list = word_tokenize(sent)
raw_word_counter += len(word_list)
word_counter += len([s for s in word_list if s not in punctuation])
one_symbol_counter = count_one_symbol_words(word_list)
if one_symbol_counter > most_one_symbols_counter:
most_one_symbols_counter = one_symbol_counter
most_one_symbols = sent
for word in word_list:
frequency_dict[word.lower()] += 1
for word in frequency_dict.keys():
frequency_dict[word.lower()] /= raw_word_counter
average_word_frequency = round(word_counter / len(sents), 3)
if most_one_symbols:
print('Наибольшее количество односимвольных слов в предложении:\n {}\n Их количество - {}'.format(most_one_symbols, most_one_symbols_counter))
else:
print('В тексте нет ни одного предложения с односимвольным словом')
print('Средляя длина предложения - {}'.format(average_word_frequency))
frequency_dict = dict(sorted(frequency_dict.items(), key=operator.itemgetter(1), reverse=True))
print('10 самых частоиспользуемых слов:\n')
for word in list(frequency_dict.keys())[:10]:
print('{}: {}'.format(word, frequency_dict[word]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment