Skip to content

Instantly share code, notes, and snippets.

@AO8
Last active November 17, 2023 18:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AO8/78ef34cf37b103f2c41d8d34474dbcb8 to your computer and use it in GitHub Desktop.
Save AO8/78ef34cf37b103f2c41d8d34474dbcb8 to your computer and use it in GitHub Desktop.
Python text analyzer using TextBlob and Textatistic.
import nltk
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
from textatistic import Textatistic
# This corpus is required for the Naive Bayes Analyzer
nltk.download("movie_reviews")
def main():
user_text = get_user_text()
blob = convert_to_blob(user_text)
texta_dict = convert_to_textatistic_dict(user_text)
print()
display_sentiment(blob)
print()
display_basic_stats(texta_dict)
print()
display_readability(texta_dict)
def get_user_text():
user_text = input("Copy and paste the text you would like to analyze below:\n")
return user_text
def convert_to_blob(text):
blob = TextBlob(text, analyzer=NaiveBayesAnalyzer())
return blob.sentiment
def convert_to_textatistic_dict(text):
return Textatistic(text).dict()
def display_sentiment(blob):
"""Uses the Naive Bayes Anaylzer corpus to measure
a text's positive percentage, negative percentage, and
the text's overall classification, where 'pos' is
positive and 'neg' is negative."""
print("Sentiment Summary:\n".upper())
print(f"Positive percentage: {round(blob.p_pos * 100, 2)}%")
print(f"Negative percentage: {round(blob.p_neg * 100, 2)}%")
print(f"Overall sentiment classification: {blob.classification}")
def display_basic_stats(texta_dict):
"""Pulls from a Textatistic dictionary to diplays character count,
word count, sentence count, and number of polysyllable words."""
print("Basic Stats:\n".upper())
print(f"Character count: {texta_dict['char_count']}")
print(f"Word count: {texta_dict['word_count']}")
print(f"Sentence count: {texta_dict['sent_count']}")
print(f"Polysyllable words used: {texta_dict['polysyblword_count']}")
def display_readability(texta_dict):
"""Flesh Kincaid score corresponds to a specific grade level.
Gunning Fog score corresponds to a specific grade level.
SMOG score corresponds to the years of education required to understand a text.
Dale-Chall score maps to grade levels from 4 and below to college grads (grade 16) and above."""
print("Readability Summary:\n".upper())
print(f"Flesch Kincaid score: {round(texta_dict['fleschkincaid_score'], 1)}")
print(f"Gunning Fog score: {round(texta_dict['gunningfog_score'], 1)}")
print(f"Simple Measure of Gobbledygook score: {round(texta_dict['smog_score'], 1)}")
print(f"Dale-Chall score: {round(texta_dict['dalechall_score'], 1)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment