Skip to content

Instantly share code, notes, and snippets.

@WhiteBlackGoose
Last active December 25, 2018 14:05
Show Gist options
  • Save WhiteBlackGoose/50ec80a27ed983431f0b23591816f0c5 to your computer and use it in GitHub Desktop.
Save WhiteBlackGoose/50ec80a27ed983431f0b23591816f0c5 to your computer and use it in GitHub Desktop.
computer_ling
from nltk import word_tokenize, sent_tokenize
PATH = "text_for_test.txt"
text = open(PATH, encoding="utf8").read()
words3, words1 = 0, 0
for word in word_tokenize(text):
if len(word) == 3:
words3 += 1
elif len(word) == 1:
words1 += 1
maxsent = ''
maxlt = 0
for sent in sent_tokenize(text):
if sent.lower().count(" в ") > maxlt:
maxlt = sent.lower().count(" в ")
maxsent = sent
print("a)", words3 / words1)
print("b)", maxsent, "(" + str(maxlt) + ")")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment