chad-m/streamlit_readability_checker.py

## streamlit_readability_checker.py
import base64
import uuid
import re

import streamlit as st
import pandas as pd
import textstat


# st.set_option('deprecation.showfileUploaderEncoding', False)
# Reference: https://github.com/shivam5992/textstat

def main():

    # Sidebar
    st.sidebar.markdown('# Content Readability Checker :orange_book: :pencil2:\n' + '-' * 17)
    mode = st.sidebar.selectbox('Main Menu', ['Input Text', 'Upload File (CSV)'])
    st.sidebar.markdown('-' * 17)

    # Main
    if mode == 'Input Text':
        load_input_text_mode()

    elif mode == 'Upload File (CSV)':
        load_upload_mode()
        st.sidebar.markdown('-' * 17)

    else:
        st.write(':sunglasses:')

    if st.sidebar.checkbox('About'):
        st.sidebar.markdown("""
            This app returns a set of readability scores for given content items, and might be useful
            to content creators, marketing teams, among others.

            The set of extracted scores includes: `syllable count`, `word count`, `sentence count`,
            ` Flesch Reading Ease formula`, `Flesch-Kincaid Grade Level`, `The Fog Scale (Gunning FOG Formula)`,
            `The SMOG Index`, `Automated Readability Index`, `The Coleman-Liau Index`, `Linsear Write Formula`,
            `Dale-Chall Readability Score`, and a `Readability Consensus` based upon all the above tests.""")


def load_input_text_mode():
    test_data = "It's time to book your mammogram appointment"
    s = st.text_area('Enter Text', test_data, height=225)
    results = content_item_readability(s)
    st.write(results)


def load_upload_mode():
    # Upload file or use test data
    uploaded_file = st.sidebar.file_uploader("Load Data (Single Column CSV)", type="csv")
    if uploaded_file:
        uploaded_content_df = pd.read_csv(uploaded_file)
        use_col = st.sidebar.selectbox('Choose column name to evaluate', list(uploaded_content_df.columns))
        results = content_items_readability(uploaded_content_df, use_col=use_col)
        st.write(results)
        download_btn_str = download_button(results, f'Content Items - {use_col} - Readability Results.csv', 'Download')
        st.markdown(download_btn_str, unsafe_allow_html=True)


def content_item_readability(s):
    if isinstance(s, str):

        results = dict(
            # Readability
            flesch_reading_ease=textstat.flesch_reading_ease(s),
            smog_index=textstat.smog_index(s),
            flesch_kincaid_grade=textstat.flesch_kincaid_grade(s),
            coleman_liau_index=textstat.coleman_liau_index(s),
            automated_readability_index=textstat.automated_readability_index(s),
            dale_chall_readability_score=textstat.dale_chall_readability_score(s),
            difficult_words=textstat.difficult_words(s),
            linsear_write_formula=textstat.linsear_write_formula(s),
            gunning_fog=textstat.gunning_fog(s),
            text_standard=textstat.text_standard(s),
            # Additionals
            syllable_count=textstat.syllable_count(s),
            word_count=textstat.lexicon_count(s),
            sentence_count=textstat.sentence_count(s))

        return results

    else:
        raise NotImplementedError


def content_items_readability(df, use_col=None):
    # Expects a single column dataframe with the content items
    # to be evaluated.
    if not use_col:
        df['flesch_reading_ease'] = df.iloc[:, 0].map(textstat.flesch_reading_ease)
        df['smog_index'] = df.iloc[:, 0].map(textstat.smog_index)
        df['flesch_kincaid_grade'] = df.iloc[:, 0].map(textstat.flesch_kincaid_grade)
        df['coleman_liau_index'] = df.iloc[:, 0].map(textstat.coleman_liau_index)
        df['automated_readability_index'] = df.iloc[:, 0].map(textstat.automated_readability_index)
        df['dale_chall_readability_score'] = df.iloc[:, 0].map(textstat.dale_chall_readability_score)
        df['difficult_words'] = df.iloc[:, 0].map(textstat.difficult_words)
        df['linsear_write_formula'] = df.iloc[:, 0].map(textstat.linsear_write_formula)
        df['gunning_fog'] = df.iloc[:, 0].map(textstat.gunning_fog)
        df['text_standard'] = df.iloc[:, 0].map(textstat.text_standard)
        df['syllable_count'] = df.iloc[:, 0].map(textstat.syllable_count)
        df['word_count'] = df.iloc[:, 0].map(textstat.lexicon_count)
        df['sentence_count)'] = df.iloc[:, 0].map(textstat.sentence_count)

    else:
        df['flesch_reading_ease'] = df.loc[:, use_col].map(textstat.flesch_reading_ease)
        df['smog_index'] = df.loc[:, use_col].map(textstat.smog_index)
        df['flesch_kincaid_grade'] = df.loc[:, use_col].map(textstat.flesch_kincaid_grade)
        df['coleman_liau_index'] = df.loc[:, use_col].map(textstat.coleman_liau_index)
        df['automated_readability_index'] = df.loc[:, use_col].map(textstat.automated_readability_index)
        df['dale_chall_readability_score'] = df.loc[:, use_col].map(textstat.dale_chall_readability_score)
        df['difficult_words'] = df.loc[:, use_col].map(textstat.difficult_words)
        df['linsear_write_formula'] = df.loc[:, use_col].map(textstat.linsear_write_formula)
        df['gunning_fog'] = df.loc[:, use_col].map(textstat.gunning_fog)
        df['text_standard'] = df.loc[:, use_col].map(textstat.text_standard)
        df['syllable_count'] = df.loc[:, use_col].map(textstat.syllable_count)
        df['word_count'] = df.loc[:, use_col].map(textstat.lexicon_count)
        df['sentence_count)'] = df.loc[:, use_col].map(textstat.sentence_count)

    return df


def download_button(object_to_download, download_filename, button_text, pickle_it=False):
    """
    Generates a link to download the given object_to_download.

    Params:
    ------
    object_to_download:  The object to be downloaded.
    download_filename (str): filename and extension of file. e.g. mydata.csv,
    some_txt_output.txt download_link_text (str): Text to display for download
    link.
    button_text (str): Text to display on download button (e.g. 'click here to download file')
    pickle_it (bool): If True, pickle file.

    Returns:
    -------
    (str): the anchor tag to download object_to_download

    Examples:
    --------
    download_link(your_df, 'YOUR_DF.csv', 'Click to download data!')
    download_link(your_str, 'YOUR_STRING.txt', 'Click to download text!')

    """
    if pickle_it:
        try:
            object_to_download = pickle.dumps(object_to_download)
        except pickle.PicklingError as e:
            st.write(e)
            return None

    else:
        if isinstance(object_to_download, bytes):
            pass

        elif isinstance(object_to_download, pd.DataFrame):
            object_to_download = object_to_download.to_csv(index=False)

        # Try JSON encode for everything else
        else:
            object_to_download = json.dumps(object_to_download)

    try:
        # some strings <-> bytes conversions necessary here
        b64 = base64.b64encode(object_to_download.encode()).decode()

    except AttributeError as e:
        b64 = base64.b64encode(object_to_download).decode()

    button_uuid = str(uuid.uuid4()).replace('-', '')
    button_id = re.sub('\d+', '', button_uuid)

    custom_css = f"""
        <style>
            #{button_id} {{
                background-color: rgb(255, 255, 255);
                color: rgb(38, 39, 48);
                padding: 0.25em 0.38em;
                position: relative;
                text-decoration: none;
                border-radius: 4px;
                border-width: 1px;
                border-style: solid;
                border-color: rgb(230, 234, 241);
                border-image: initial;

            }}
            #{button_id}:hover {{
                border-color: rgb(246, 51, 102);
                color: rgb(246, 51, 102);
            }}
            #{button_id}:active {{
                box-shadow: none;
                background-color: rgb(246, 51, 102);
                color: white;
                }}
        </style> """

    dl_link = custom_css + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br></br>'

    return dl_link


if __name__ == '__main__':
    main()
	import base64
	import uuid
	import re

	import streamlit as st
	import pandas as pd
	import textstat


	# st.set_option('deprecation.showfileUploaderEncoding', False)
	# Reference: https://github.com/shivam5992/textstat

	def main():

	# Sidebar
	st.sidebar.markdown('# Content Readability Checker :orange_book: :pencil2:\n' + '-' * 17)
	mode = st.sidebar.selectbox('Main Menu', ['Input Text', 'Upload File (CSV)'])
	st.sidebar.markdown('-' * 17)

	# Main
	if mode == 'Input Text':
	load_input_text_mode()

	elif mode == 'Upload File (CSV)':
	load_upload_mode()
	st.sidebar.markdown('-' * 17)

	else:
	st.write(':sunglasses:')

	if st.sidebar.checkbox('About'):
	st.sidebar.markdown("""
	This app returns a set of readability scores for given content items, and might be useful
	to content creators, marketing teams, among others.

	The set of extracted scores includes: `syllable count`, `word count`, `sentence count`,
	` Flesch Reading Ease formula`, `Flesch-Kincaid Grade Level`, `The Fog Scale (Gunning FOG Formula)`,
	`The SMOG Index`, `Automated Readability Index`, `The Coleman-Liau Index`, `Linsear Write Formula`,
	`Dale-Chall Readability Score`, and a `Readability Consensus` based upon all the above tests.""")


	def load_input_text_mode():
	test_data = "It's time to book your mammogram appointment"
	s = st.text_area('Enter Text', test_data, height=225)
	results = content_item_readability(s)
	st.write(results)


	def load_upload_mode():
	# Upload file or use test data
	uploaded_file = st.sidebar.file_uploader("Load Data (Single Column CSV)", type="csv")
	if uploaded_file:
	uploaded_content_df = pd.read_csv(uploaded_file)
	use_col = st.sidebar.selectbox('Choose column name to evaluate', list(uploaded_content_df.columns))
	results = content_items_readability(uploaded_content_df, use_col=use_col)
	st.write(results)
	download_btn_str = download_button(results, f'Content Items - {use_col} - Readability Results.csv', 'Download')
	st.markdown(download_btn_str, unsafe_allow_html=True)


	def content_item_readability(s):
	if isinstance(s, str):

	results = dict(
	# Readability
	flesch_reading_ease=textstat.flesch_reading_ease(s),
	smog_index=textstat.smog_index(s),
	flesch_kincaid_grade=textstat.flesch_kincaid_grade(s),
	coleman_liau_index=textstat.coleman_liau_index(s),
	automated_readability_index=textstat.automated_readability_index(s),
	dale_chall_readability_score=textstat.dale_chall_readability_score(s),
	difficult_words=textstat.difficult_words(s),
	linsear_write_formula=textstat.linsear_write_formula(s),
	gunning_fog=textstat.gunning_fog(s),
	text_standard=textstat.text_standard(s),
	# Additionals
	syllable_count=textstat.syllable_count(s),
	word_count=textstat.lexicon_count(s),
	sentence_count=textstat.sentence_count(s))

	return results

	else:
	raise NotImplementedError


	def content_items_readability(df, use_col=None):
	# Expects a single column dataframe with the content items
	# to be evaluated.
	if not use_col:
	df['flesch_reading_ease'] = df.iloc[:, 0].map(textstat.flesch_reading_ease)
	df['smog_index'] = df.iloc[:, 0].map(textstat.smog_index)
	df['flesch_kincaid_grade'] = df.iloc[:, 0].map(textstat.flesch_kincaid_grade)
	df['coleman_liau_index'] = df.iloc[:, 0].map(textstat.coleman_liau_index)
	df['automated_readability_index'] = df.iloc[:, 0].map(textstat.automated_readability_index)
	df['dale_chall_readability_score'] = df.iloc[:, 0].map(textstat.dale_chall_readability_score)
	df['difficult_words'] = df.iloc[:, 0].map(textstat.difficult_words)
	df['linsear_write_formula'] = df.iloc[:, 0].map(textstat.linsear_write_formula)
	df['gunning_fog'] = df.iloc[:, 0].map(textstat.gunning_fog)
	df['text_standard'] = df.iloc[:, 0].map(textstat.text_standard)
	df['syllable_count'] = df.iloc[:, 0].map(textstat.syllable_count)
	df['word_count'] = df.iloc[:, 0].map(textstat.lexicon_count)
	df['sentence_count)'] = df.iloc[:, 0].map(textstat.sentence_count)

	else:
	df['flesch_reading_ease'] = df.loc[:, use_col].map(textstat.flesch_reading_ease)
	df['smog_index'] = df.loc[:, use_col].map(textstat.smog_index)
	df['flesch_kincaid_grade'] = df.loc[:, use_col].map(textstat.flesch_kincaid_grade)
	df['coleman_liau_index'] = df.loc[:, use_col].map(textstat.coleman_liau_index)
	df['automated_readability_index'] = df.loc[:, use_col].map(textstat.automated_readability_index)
	df['dale_chall_readability_score'] = df.loc[:, use_col].map(textstat.dale_chall_readability_score)
	df['difficult_words'] = df.loc[:, use_col].map(textstat.difficult_words)
	df['linsear_write_formula'] = df.loc[:, use_col].map(textstat.linsear_write_formula)
	df['gunning_fog'] = df.loc[:, use_col].map(textstat.gunning_fog)
	df['text_standard'] = df.loc[:, use_col].map(textstat.text_standard)
	df['syllable_count'] = df.loc[:, use_col].map(textstat.syllable_count)
	df['word_count'] = df.loc[:, use_col].map(textstat.lexicon_count)
	df['sentence_count)'] = df.loc[:, use_col].map(textstat.sentence_count)

	return df


	def download_button(object_to_download, download_filename, button_text, pickle_it=False):
	"""
	Generates a link to download the given object_to_download.

	Params:
	------
	object_to_download: The object to be downloaded.
	download_filename (str): filename and extension of file. e.g. mydata.csv,
	some_txt_output.txt download_link_text (str): Text to display for download
	link.
	button_text (str): Text to display on download button (e.g. 'click here to download file')
	pickle_it (bool): If True, pickle file.

	Returns:
	-------
	(str): the anchor tag to download object_to_download

	Examples:
	--------
	download_link(your_df, 'YOUR_DF.csv', 'Click to download data!')
	download_link(your_str, 'YOUR_STRING.txt', 'Click to download text!')

	"""
	if pickle_it:
	try:
	object_to_download = pickle.dumps(object_to_download)
	except pickle.PicklingError as e:
	st.write(e)
	return None

	else:
	if isinstance(object_to_download, bytes):
	pass

	elif isinstance(object_to_download, pd.DataFrame):
	object_to_download = object_to_download.to_csv(index=False)

	# Try JSON encode for everything else
	else:
	object_to_download = json.dumps(object_to_download)

	try:
	# some strings <-> bytes conversions necessary here
	b64 = base64.b64encode(object_to_download.encode()).decode()

	except AttributeError as e:
	b64 = base64.b64encode(object_to_download).decode()

	button_uuid = str(uuid.uuid4()).replace('-', '')
	button_id = re.sub('\d+', '', button_uuid)

	custom_css = f"""
	<style>
	#{button_id} {{
	background-color: rgb(255, 255, 255);
	color: rgb(38, 39, 48);
	padding: 0.25em 0.38em;
	position: relative;
	text-decoration: none;
	border-radius: 4px;
	border-width: 1px;
	border-style: solid;
	border-color: rgb(230, 234, 241);
	border-image: initial;

	}}
	#{button_id}:hover {{
	border-color: rgb(246, 51, 102);
	color: rgb(246, 51, 102);
	}}
	#{button_id}:active {{
	box-shadow: none;
	background-color: rgb(246, 51, 102);
	color: white;
	}}
	</style> """

	dl_link = custom_css + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br></br>'

	return dl_link


	if __name__ == '__main__':
	main()