Skip to content

Instantly share code, notes, and snippets.

@chad-m
Created July 31, 2020 22:29
Show Gist options
  • Save chad-m/7b36d62f2d01291ce37effd05d6af65e to your computer and use it in GitHub Desktop.
Save chad-m/7b36d62f2d01291ce37effd05d6af65e to your computer and use it in GitHub Desktop.
A Streamlit app to check readability of given content
import base64
import uuid
import re
import streamlit as st
import pandas as pd
import textstat
# st.set_option('deprecation.showfileUploaderEncoding', False)
# Reference: https://github.com/shivam5992/textstat
def main():
# Sidebar
st.sidebar.markdown('# Content Readability Checker :orange_book: :pencil2:\n' + '-' * 17)
mode = st.sidebar.selectbox('Main Menu', ['Input Text', 'Upload File (CSV)'])
st.sidebar.markdown('-' * 17)
# Main
if mode == 'Input Text':
load_input_text_mode()
elif mode == 'Upload File (CSV)':
load_upload_mode()
st.sidebar.markdown('-' * 17)
else:
st.write(':sunglasses:')
if st.sidebar.checkbox('About'):
st.sidebar.markdown("""
This app returns a set of readability scores for given content items, and might be useful
to content creators, marketing teams, among others.
The set of extracted scores includes: `syllable count`, `word count`, `sentence count`,
` Flesch Reading Ease formula`, `Flesch-Kincaid Grade Level`, `The Fog Scale (Gunning FOG Formula)`,
`The SMOG Index`, `Automated Readability Index`, `The Coleman-Liau Index`, `Linsear Write Formula`,
`Dale-Chall Readability Score`, and a `Readability Consensus` based upon all the above tests.""")
def load_input_text_mode():
test_data = "It's time to book your mammogram appointment"
s = st.text_area('Enter Text', test_data, height=225)
results = content_item_readability(s)
st.write(results)
def load_upload_mode():
# Upload file or use test data
uploaded_file = st.sidebar.file_uploader("Load Data (Single Column CSV)", type="csv")
if uploaded_file:
uploaded_content_df = pd.read_csv(uploaded_file)
use_col = st.sidebar.selectbox('Choose column name to evaluate', list(uploaded_content_df.columns))
results = content_items_readability(uploaded_content_df, use_col=use_col)
st.write(results)
download_btn_str = download_button(results, f'Content Items - {use_col} - Readability Results.csv', 'Download')
st.markdown(download_btn_str, unsafe_allow_html=True)
def content_item_readability(s):
if isinstance(s, str):
results = dict(
# Readability
flesch_reading_ease=textstat.flesch_reading_ease(s),
smog_index=textstat.smog_index(s),
flesch_kincaid_grade=textstat.flesch_kincaid_grade(s),
coleman_liau_index=textstat.coleman_liau_index(s),
automated_readability_index=textstat.automated_readability_index(s),
dale_chall_readability_score=textstat.dale_chall_readability_score(s),
difficult_words=textstat.difficult_words(s),
linsear_write_formula=textstat.linsear_write_formula(s),
gunning_fog=textstat.gunning_fog(s),
text_standard=textstat.text_standard(s),
# Additionals
syllable_count=textstat.syllable_count(s),
word_count=textstat.lexicon_count(s),
sentence_count=textstat.sentence_count(s))
return results
else:
raise NotImplementedError
def content_items_readability(df, use_col=None):
# Expects a single column dataframe with the content items
# to be evaluated.
if not use_col:
df['flesch_reading_ease'] = df.iloc[:, 0].map(textstat.flesch_reading_ease)
df['smog_index'] = df.iloc[:, 0].map(textstat.smog_index)
df['flesch_kincaid_grade'] = df.iloc[:, 0].map(textstat.flesch_kincaid_grade)
df['coleman_liau_index'] = df.iloc[:, 0].map(textstat.coleman_liau_index)
df['automated_readability_index'] = df.iloc[:, 0].map(textstat.automated_readability_index)
df['dale_chall_readability_score'] = df.iloc[:, 0].map(textstat.dale_chall_readability_score)
df['difficult_words'] = df.iloc[:, 0].map(textstat.difficult_words)
df['linsear_write_formula'] = df.iloc[:, 0].map(textstat.linsear_write_formula)
df['gunning_fog'] = df.iloc[:, 0].map(textstat.gunning_fog)
df['text_standard'] = df.iloc[:, 0].map(textstat.text_standard)
df['syllable_count'] = df.iloc[:, 0].map(textstat.syllable_count)
df['word_count'] = df.iloc[:, 0].map(textstat.lexicon_count)
df['sentence_count)'] = df.iloc[:, 0].map(textstat.sentence_count)
else:
df['flesch_reading_ease'] = df.loc[:, use_col].map(textstat.flesch_reading_ease)
df['smog_index'] = df.loc[:, use_col].map(textstat.smog_index)
df['flesch_kincaid_grade'] = df.loc[:, use_col].map(textstat.flesch_kincaid_grade)
df['coleman_liau_index'] = df.loc[:, use_col].map(textstat.coleman_liau_index)
df['automated_readability_index'] = df.loc[:, use_col].map(textstat.automated_readability_index)
df['dale_chall_readability_score'] = df.loc[:, use_col].map(textstat.dale_chall_readability_score)
df['difficult_words'] = df.loc[:, use_col].map(textstat.difficult_words)
df['linsear_write_formula'] = df.loc[:, use_col].map(textstat.linsear_write_formula)
df['gunning_fog'] = df.loc[:, use_col].map(textstat.gunning_fog)
df['text_standard'] = df.loc[:, use_col].map(textstat.text_standard)
df['syllable_count'] = df.loc[:, use_col].map(textstat.syllable_count)
df['word_count'] = df.loc[:, use_col].map(textstat.lexicon_count)
df['sentence_count)'] = df.loc[:, use_col].map(textstat.sentence_count)
return df
def download_button(object_to_download, download_filename, button_text, pickle_it=False):
"""
Generates a link to download the given object_to_download.
Params:
------
object_to_download: The object to be downloaded.
download_filename (str): filename and extension of file. e.g. mydata.csv,
some_txt_output.txt download_link_text (str): Text to display for download
link.
button_text (str): Text to display on download button (e.g. 'click here to download file')
pickle_it (bool): If True, pickle file.
Returns:
-------
(str): the anchor tag to download object_to_download
Examples:
--------
download_link(your_df, 'YOUR_DF.csv', 'Click to download data!')
download_link(your_str, 'YOUR_STRING.txt', 'Click to download text!')
"""
if pickle_it:
try:
object_to_download = pickle.dumps(object_to_download)
except pickle.PicklingError as e:
st.write(e)
return None
else:
if isinstance(object_to_download, bytes):
pass
elif isinstance(object_to_download, pd.DataFrame):
object_to_download = object_to_download.to_csv(index=False)
# Try JSON encode for everything else
else:
object_to_download = json.dumps(object_to_download)
try:
# some strings <-> bytes conversions necessary here
b64 = base64.b64encode(object_to_download.encode()).decode()
except AttributeError as e:
b64 = base64.b64encode(object_to_download).decode()
button_uuid = str(uuid.uuid4()).replace('-', '')
button_id = re.sub('\d+', '', button_uuid)
custom_css = f"""
<style>
#{button_id} {{
background-color: rgb(255, 255, 255);
color: rgb(38, 39, 48);
padding: 0.25em 0.38em;
position: relative;
text-decoration: none;
border-radius: 4px;
border-width: 1px;
border-style: solid;
border-color: rgb(230, 234, 241);
border-image: initial;
}}
#{button_id}:hover {{
border-color: rgb(246, 51, 102);
color: rgb(246, 51, 102);
}}
#{button_id}:active {{
box-shadow: none;
background-color: rgb(246, 51, 102);
color: white;
}}
</style> """
dl_link = custom_css + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br></br>'
return dl_link
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment