Created
July 31, 2020 22:29
-
-
Save chad-m/7b36d62f2d01291ce37effd05d6af65e to your computer and use it in GitHub Desktop.
A Streamlit app to check readability of given content
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import uuid | |
import re | |
import streamlit as st | |
import pandas as pd | |
import textstat | |
# st.set_option('deprecation.showfileUploaderEncoding', False) | |
# Reference: https://github.com/shivam5992/textstat | |
def main(): | |
# Sidebar | |
st.sidebar.markdown('# Content Readability Checker :orange_book: :pencil2:\n' + '-' * 17) | |
mode = st.sidebar.selectbox('Main Menu', ['Input Text', 'Upload File (CSV)']) | |
st.sidebar.markdown('-' * 17) | |
# Main | |
if mode == 'Input Text': | |
load_input_text_mode() | |
elif mode == 'Upload File (CSV)': | |
load_upload_mode() | |
st.sidebar.markdown('-' * 17) | |
else: | |
st.write(':sunglasses:') | |
if st.sidebar.checkbox('About'): | |
st.sidebar.markdown(""" | |
This app returns a set of readability scores for given content items, and might be useful | |
to content creators, marketing teams, among others. | |
The set of extracted scores includes: `syllable count`, `word count`, `sentence count`, | |
` Flesch Reading Ease formula`, `Flesch-Kincaid Grade Level`, `The Fog Scale (Gunning FOG Formula)`, | |
`The SMOG Index`, `Automated Readability Index`, `The Coleman-Liau Index`, `Linsear Write Formula`, | |
`Dale-Chall Readability Score`, and a `Readability Consensus` based upon all the above tests.""") | |
def load_input_text_mode(): | |
test_data = "It's time to book your mammogram appointment" | |
s = st.text_area('Enter Text', test_data, height=225) | |
results = content_item_readability(s) | |
st.write(results) | |
def load_upload_mode(): | |
# Upload file or use test data | |
uploaded_file = st.sidebar.file_uploader("Load Data (Single Column CSV)", type="csv") | |
if uploaded_file: | |
uploaded_content_df = pd.read_csv(uploaded_file) | |
use_col = st.sidebar.selectbox('Choose column name to evaluate', list(uploaded_content_df.columns)) | |
results = content_items_readability(uploaded_content_df, use_col=use_col) | |
st.write(results) | |
download_btn_str = download_button(results, f'Content Items - {use_col} - Readability Results.csv', 'Download') | |
st.markdown(download_btn_str, unsafe_allow_html=True) | |
def content_item_readability(s): | |
if isinstance(s, str): | |
results = dict( | |
# Readability | |
flesch_reading_ease=textstat.flesch_reading_ease(s), | |
smog_index=textstat.smog_index(s), | |
flesch_kincaid_grade=textstat.flesch_kincaid_grade(s), | |
coleman_liau_index=textstat.coleman_liau_index(s), | |
automated_readability_index=textstat.automated_readability_index(s), | |
dale_chall_readability_score=textstat.dale_chall_readability_score(s), | |
difficult_words=textstat.difficult_words(s), | |
linsear_write_formula=textstat.linsear_write_formula(s), | |
gunning_fog=textstat.gunning_fog(s), | |
text_standard=textstat.text_standard(s), | |
# Additionals | |
syllable_count=textstat.syllable_count(s), | |
word_count=textstat.lexicon_count(s), | |
sentence_count=textstat.sentence_count(s)) | |
return results | |
else: | |
raise NotImplementedError | |
def content_items_readability(df, use_col=None): | |
# Expects a single column dataframe with the content items | |
# to be evaluated. | |
if not use_col: | |
df['flesch_reading_ease'] = df.iloc[:, 0].map(textstat.flesch_reading_ease) | |
df['smog_index'] = df.iloc[:, 0].map(textstat.smog_index) | |
df['flesch_kincaid_grade'] = df.iloc[:, 0].map(textstat.flesch_kincaid_grade) | |
df['coleman_liau_index'] = df.iloc[:, 0].map(textstat.coleman_liau_index) | |
df['automated_readability_index'] = df.iloc[:, 0].map(textstat.automated_readability_index) | |
df['dale_chall_readability_score'] = df.iloc[:, 0].map(textstat.dale_chall_readability_score) | |
df['difficult_words'] = df.iloc[:, 0].map(textstat.difficult_words) | |
df['linsear_write_formula'] = df.iloc[:, 0].map(textstat.linsear_write_formula) | |
df['gunning_fog'] = df.iloc[:, 0].map(textstat.gunning_fog) | |
df['text_standard'] = df.iloc[:, 0].map(textstat.text_standard) | |
df['syllable_count'] = df.iloc[:, 0].map(textstat.syllable_count) | |
df['word_count'] = df.iloc[:, 0].map(textstat.lexicon_count) | |
df['sentence_count)'] = df.iloc[:, 0].map(textstat.sentence_count) | |
else: | |
df['flesch_reading_ease'] = df.loc[:, use_col].map(textstat.flesch_reading_ease) | |
df['smog_index'] = df.loc[:, use_col].map(textstat.smog_index) | |
df['flesch_kincaid_grade'] = df.loc[:, use_col].map(textstat.flesch_kincaid_grade) | |
df['coleman_liau_index'] = df.loc[:, use_col].map(textstat.coleman_liau_index) | |
df['automated_readability_index'] = df.loc[:, use_col].map(textstat.automated_readability_index) | |
df['dale_chall_readability_score'] = df.loc[:, use_col].map(textstat.dale_chall_readability_score) | |
df['difficult_words'] = df.loc[:, use_col].map(textstat.difficult_words) | |
df['linsear_write_formula'] = df.loc[:, use_col].map(textstat.linsear_write_formula) | |
df['gunning_fog'] = df.loc[:, use_col].map(textstat.gunning_fog) | |
df['text_standard'] = df.loc[:, use_col].map(textstat.text_standard) | |
df['syllable_count'] = df.loc[:, use_col].map(textstat.syllable_count) | |
df['word_count'] = df.loc[:, use_col].map(textstat.lexicon_count) | |
df['sentence_count)'] = df.loc[:, use_col].map(textstat.sentence_count) | |
return df | |
def download_button(object_to_download, download_filename, button_text, pickle_it=False): | |
""" | |
Generates a link to download the given object_to_download. | |
Params: | |
------ | |
object_to_download: The object to be downloaded. | |
download_filename (str): filename and extension of file. e.g. mydata.csv, | |
some_txt_output.txt download_link_text (str): Text to display for download | |
link. | |
button_text (str): Text to display on download button (e.g. 'click here to download file') | |
pickle_it (bool): If True, pickle file. | |
Returns: | |
------- | |
(str): the anchor tag to download object_to_download | |
Examples: | |
-------- | |
download_link(your_df, 'YOUR_DF.csv', 'Click to download data!') | |
download_link(your_str, 'YOUR_STRING.txt', 'Click to download text!') | |
""" | |
if pickle_it: | |
try: | |
object_to_download = pickle.dumps(object_to_download) | |
except pickle.PicklingError as e: | |
st.write(e) | |
return None | |
else: | |
if isinstance(object_to_download, bytes): | |
pass | |
elif isinstance(object_to_download, pd.DataFrame): | |
object_to_download = object_to_download.to_csv(index=False) | |
# Try JSON encode for everything else | |
else: | |
object_to_download = json.dumps(object_to_download) | |
try: | |
# some strings <-> bytes conversions necessary here | |
b64 = base64.b64encode(object_to_download.encode()).decode() | |
except AttributeError as e: | |
b64 = base64.b64encode(object_to_download).decode() | |
button_uuid = str(uuid.uuid4()).replace('-', '') | |
button_id = re.sub('\d+', '', button_uuid) | |
custom_css = f""" | |
<style> | |
#{button_id} {{ | |
background-color: rgb(255, 255, 255); | |
color: rgb(38, 39, 48); | |
padding: 0.25em 0.38em; | |
position: relative; | |
text-decoration: none; | |
border-radius: 4px; | |
border-width: 1px; | |
border-style: solid; | |
border-color: rgb(230, 234, 241); | |
border-image: initial; | |
}} | |
#{button_id}:hover {{ | |
border-color: rgb(246, 51, 102); | |
color: rgb(246, 51, 102); | |
}} | |
#{button_id}:active {{ | |
box-shadow: none; | |
background-color: rgb(246, 51, 102); | |
color: white; | |
}} | |
</style> """ | |
dl_link = custom_css + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br></br>' | |
return dl_link | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment