alexritter96/readability.py

## readability.py
import math

from nltk.corpus import cmudict
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import sent_tokenize

d = cmudict.dict()
tokenizer = RegexpTokenizer(r'\w+')


class Readability:

    def __init__(self, txt):
        self.sents = sent_tokenize(txt)
        self.words = tokenizer.tokenize(txt)
        self.txt = txt

    def char(self):
        char = 0
        for word in self.words:
            for w in word:
                char += 1

        return char

    def sent_count(self):
        return len(self.sents)

    def word_count(self):
        return len(self.words)

    def syl(self, word):
        # returns number of syllables per word
        try:
            syllable = [len(list(y for y in x if y[-1].isdigit()))
                        for x in d[word.lower()]]
            return syllable[0]

        except KeyError:
            return None

    def list_to_word(self):
        # tokenizes all words. For each token, the syl function is called.
        # Returns the number of syllables for each token in a list.
        len_syl = []
        word = self.words

        for w in word:
            len_syl.append(self.syl(w))

        return len_syl

    def poly_syl(self):
        poly_syl = []

        for p in self.list_to_word():
            if p >= 3:
                poly_syl.append(p)

        return poly_syl

    def flesch_kincaid(self, ease=False):
        # Flesch Kincaid algorithm determines the readability ease of a given text.
        # Higher score indicates easier comprehension and lower score indicates more complexity
        syl_int = sum(self.list_to_word())
        TWS = self.word_count() / self.sent_count()
        TSW = syl_int / self.word_count()

        if ease:
            return 206.835 - 1.015 * TWS - 84.6 * TSW

        else:
            return 0.39 * TWS + 11.8 * TSW - 15.59

    def gunning_fog(self):
        counter = len(self.poly_syl())
        TWS = self.word_count() / self.sent_count()
        CWW = counter / self.word_count()
        return 0.4 * (TWS + 100 * CWW)

    def smog_index(self):
        # for accuracy, there must be at least 30 sentences
        f = len(self.poly_syl()) * (30 / self.word_count())
        return 1.0430 * math.sqrt(f) + 3.1291

    def ari(self):
        chars = self.char()
        return 4.71 * (chars / self.word_count()) + 0.5 * (self.word_count() / self.sent_count()) - 21.43


r = Readability('This is a fucking test')
print(r.char())
print(r.sent_count())
print(r.word_count())
print(r.flesch_kincaid())
print(r.gunning_fog())
print(r.smog_index())
print(r.ari())
	import math

	from nltk.corpus import cmudict
	from nltk.tokenize import RegexpTokenizer
	from nltk.tokenize import sent_tokenize

	d = cmudict.dict()
	tokenizer = RegexpTokenizer(r'\w+')


	class Readability:

	def __init__(self, txt):
	self.sents = sent_tokenize(txt)
	self.words = tokenizer.tokenize(txt)
	self.txt = txt

	def char(self):
	char = 0
	for word in self.words:
	for w in word:
	char += 1

	return char

	def sent_count(self):
	return len(self.sents)

	def word_count(self):
	return len(self.words)

	def syl(self, word):
	# returns number of syllables per word
	try:
	syllable = [len(list(y for y in x if y[-1].isdigit()))
	for x in d[word.lower()]]
	return syllable[0]

	except KeyError:
	return None

	def list_to_word(self):
	# tokenizes all words. For each token, the syl function is called.
	# Returns the number of syllables for each token in a list.
	len_syl = []
	word = self.words

	for w in word:
	len_syl.append(self.syl(w))

	return len_syl

	def poly_syl(self):
	poly_syl = []

	for p in self.list_to_word():
	if p >= 3:
	poly_syl.append(p)

	return poly_syl

	def flesch_kincaid(self, ease=False):
	# Flesch Kincaid algorithm determines the readability ease of a given text.
	# Higher score indicates easier comprehension and lower score indicates more complexity
	syl_int = sum(self.list_to_word())
	TWS = self.word_count() / self.sent_count()
	TSW = syl_int / self.word_count()

	if ease:
	return 206.835 - 1.015 * TWS - 84.6 * TSW

	else:
	return 0.39 * TWS + 11.8 * TSW - 15.59

	def gunning_fog(self):
	counter = len(self.poly_syl())
	TWS = self.word_count() / self.sent_count()
	CWW = counter / self.word_count()
	return 0.4 * (TWS + 100 * CWW)

	def smog_index(self):
	# for accuracy, there must be at least 30 sentences
	f = len(self.poly_syl()) * (30 / self.word_count())
	return 1.0430 * math.sqrt(f) + 3.1291

	def ari(self):
	chars = self.char()
	return 4.71 * (chars / self.word_count()) + 0.5 * (self.word_count() / self.sent_count()) - 21.43


	r = Readability('This is a fucking test')
	print(r.char())
	print(r.sent_count())
	print(r.word_count())
	print(r.flesch_kincaid())
	print(r.gunning_fog())
	print(r.smog_index())
	print(r.ari())