Created
March 20, 2018 21:58
-
-
Save alexritter96/9be9c03009fb344fe54d43b0eaef644c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from nltk.corpus import cmudict | |
from nltk.tokenize import RegexpTokenizer | |
from nltk.tokenize import sent_tokenize | |
d = cmudict.dict() | |
tokenizer = RegexpTokenizer(r'\w+') | |
class Readability: | |
def __init__(self, txt): | |
self.sents = sent_tokenize(txt) | |
self.words = tokenizer.tokenize(txt) | |
self.txt = txt | |
def char(self): | |
char = 0 | |
for word in self.words: | |
for w in word: | |
char += 1 | |
return char | |
def sent_count(self): | |
return len(self.sents) | |
def word_count(self): | |
return len(self.words) | |
def syl(self, word): | |
# returns number of syllables per word | |
try: | |
syllable = [len(list(y for y in x if y[-1].isdigit())) | |
for x in d[word.lower()]] | |
return syllable[0] | |
except KeyError: | |
return None | |
def list_to_word(self): | |
# tokenizes all words. For each token, the syl function is called. | |
# Returns the number of syllables for each token in a list. | |
len_syl = [] | |
word = self.words | |
for w in word: | |
len_syl.append(self.syl(w)) | |
return len_syl | |
def poly_syl(self): | |
poly_syl = [] | |
for p in self.list_to_word(): | |
if p >= 3: | |
poly_syl.append(p) | |
return poly_syl | |
def flesch_kincaid(self, ease=False): | |
# Flesch Kincaid algorithm determines the readability ease of a given text. | |
# Higher score indicates easier comprehension and lower score indicates more complexity | |
syl_int = sum(self.list_to_word()) | |
TWS = self.word_count() / self.sent_count() | |
TSW = syl_int / self.word_count() | |
if ease: | |
return 206.835 - 1.015 * TWS - 84.6 * TSW | |
else: | |
return 0.39 * TWS + 11.8 * TSW - 15.59 | |
def gunning_fog(self): | |
counter = len(self.poly_syl()) | |
TWS = self.word_count() / self.sent_count() | |
CWW = counter / self.word_count() | |
return 0.4 * (TWS + 100 * CWW) | |
def smog_index(self): | |
# for accuracy, there must be at least 30 sentences | |
f = len(self.poly_syl()) * (30 / self.word_count()) | |
return 1.0430 * math.sqrt(f) + 3.1291 | |
def ari(self): | |
chars = self.char() | |
return 4.71 * (chars / self.word_count()) + 0.5 * (self.word_count() / self.sent_count()) - 21.43 | |
r = Readability('This is a fucking test') | |
print(r.char()) | |
print(r.sent_count()) | |
print(r.word_count()) | |
print(r.flesch_kincaid()) | |
print(r.gunning_fog()) | |
print(r.smog_index()) | |
print(r.ari()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment