Skip to content

Instantly share code, notes, and snippets.

@alexritter96
Created March 20, 2018 21:58
Show Gist options
  • Save alexritter96/9be9c03009fb344fe54d43b0eaef644c to your computer and use it in GitHub Desktop.
Save alexritter96/9be9c03009fb344fe54d43b0eaef644c to your computer and use it in GitHub Desktop.
import math
from nltk.corpus import cmudict
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import sent_tokenize
d = cmudict.dict()
tokenizer = RegexpTokenizer(r'\w+')
class Readability:
def __init__(self, txt):
self.sents = sent_tokenize(txt)
self.words = tokenizer.tokenize(txt)
self.txt = txt
def char(self):
char = 0
for word in self.words:
for w in word:
char += 1
return char
def sent_count(self):
return len(self.sents)
def word_count(self):
return len(self.words)
def syl(self, word):
# returns number of syllables per word
try:
syllable = [len(list(y for y in x if y[-1].isdigit()))
for x in d[word.lower()]]
return syllable[0]
except KeyError:
return None
def list_to_word(self):
# tokenizes all words. For each token, the syl function is called.
# Returns the number of syllables for each token in a list.
len_syl = []
word = self.words
for w in word:
len_syl.append(self.syl(w))
return len_syl
def poly_syl(self):
poly_syl = []
for p in self.list_to_word():
if p >= 3:
poly_syl.append(p)
return poly_syl
def flesch_kincaid(self, ease=False):
# Flesch Kincaid algorithm determines the readability ease of a given text.
# Higher score indicates easier comprehension and lower score indicates more complexity
syl_int = sum(self.list_to_word())
TWS = self.word_count() / self.sent_count()
TSW = syl_int / self.word_count()
if ease:
return 206.835 - 1.015 * TWS - 84.6 * TSW
else:
return 0.39 * TWS + 11.8 * TSW - 15.59
def gunning_fog(self):
counter = len(self.poly_syl())
TWS = self.word_count() / self.sent_count()
CWW = counter / self.word_count()
return 0.4 * (TWS + 100 * CWW)
def smog_index(self):
# for accuracy, there must be at least 30 sentences
f = len(self.poly_syl()) * (30 / self.word_count())
return 1.0430 * math.sqrt(f) + 3.1291
def ari(self):
chars = self.char()
return 4.71 * (chars / self.word_count()) + 0.5 * (self.word_count() / self.sent_count()) - 21.43
r = Readability('This is a fucking test')
print(r.char())
print(r.sent_count())
print(r.word_count())
print(r.flesch_kincaid())
print(r.gunning_fog())
print(r.smog_index())
print(r.ari())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment