dlukes/app.py

## app.py
from collections import Counter

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import regex as re
from shiny import App, reactive, render, ui

mpl.rcParams.update(
    {
        "axes.grid": True,
        "axes.grid.axis": "both",
        "axes.spines.left": False,
        "axes.spines.right": False,
        "axes.spines.top": False,
        "axes.spines.bottom": False,
    }
)

app_ui = ui.page_fluid(
    ui.panel_title("Lexical dispersion plot"),
    ui.layout_sidebar(
        ui.panel_sidebar(
            ui.input_text_area("text", "Text to analyze"),
            ui.input_text("words", "Space-separated words to plot"),
            ui.input_checkbox("icase", "Ignore case"),
        ),
        ui.panel_main(
            ui.output_plot("dispersion_plot"),
            ui.output_table("freq_dist"),
        ),
    ),
)


def tokenize(text):
    return re.findall(
        r"\p{Alphabetic}+(?:\S+\p{Alphabetic}+)*|[\S&&\P{Alphabetic}]+",
        text,
        flags=re.VERSION1,
    )


def server(input, output, session):
    @reactive.Calc
    def tokenized_text():
        text = input.text().strip()
        if not text:
            ui.notification_show("Please provide an input text.", type="warning")
            return []
        if input.icase():
            text = text.lower()
        return tokenize(text)

    @reactive.Calc
    def split_words():
        words = input.words().strip()
        if not words:
            ui.notification_show("Please provide words to plot.", type="warning")
            return {}
        if input.icase():
            words = words.lower()
        return {w: i for (i, w) in enumerate(reversed(words.split()))}

    @output
    @render.plot(alt="Dispersion plot of chosen words in input text")
    def dispersion_plot():
        text = tokenized_text()
        words = split_words()
        if not (text and words):
            return

        xs = []
        ys = []
        for i, tok in enumerate(text):
            if tok in words:
                xs.append(i)
                ys.append(words[tok])

        fig, ax = plt.subplots()
        ax.plot(xs, ys, marker="|", markersize=20, linestyle="")
        ax.set_yticks(range(len(words)), labels=list(words))
        ax.tick_params(axis="both", length=0)
        ax.set_xlabel("Word offset")
        return fig

    @output
    @render.table()
    def freq_dist():
        text = tokenized_text()
        words = set(split_words())
        if not (text and words):
            return

        freq_dist = Counter(t for t in text if t in words)
        df = pd.DataFrame(freq_dist.most_common(), columns=["word", "frequency"])
        return df


app = App(app_ui, server)

## requirements.txt
Jinja2
	from collections import Counter

	import matplotlib as mpl
	import matplotlib.pyplot as plt
	import pandas as pd
	import regex as re
	from shiny import App, reactive, render, ui

	mpl.rcParams.update(
	{
	"axes.grid": True,
	"axes.grid.axis": "both",
	"axes.spines.left": False,
	"axes.spines.right": False,
	"axes.spines.top": False,
	"axes.spines.bottom": False,
	}
	)

	app_ui = ui.page_fluid(
	ui.panel_title("Lexical dispersion plot"),
	ui.layout_sidebar(
	ui.panel_sidebar(
	ui.input_text_area("text", "Text to analyze"),
	ui.input_text("words", "Space-separated words to plot"),
	ui.input_checkbox("icase", "Ignore case"),
	),
	ui.panel_main(
	ui.output_plot("dispersion_plot"),
	ui.output_table("freq_dist"),
	),
	),
	)


	def tokenize(text):
	return re.findall(
	r"\p{Alphabetic}+(?:\S+\p{Alphabetic}+)*\|[\S&&\P{Alphabetic}]+",
	text,
	flags=re.VERSION1,
	)


	def server(input, output, session):
	@reactive.Calc
	def tokenized_text():
	text = input.text().strip()
	if not text:
	ui.notification_show("Please provide an input text.", type="warning")
	return []
	if input.icase():
	text = text.lower()
	return tokenize(text)

	@reactive.Calc
	def split_words():
	words = input.words().strip()
	if not words:
	ui.notification_show("Please provide words to plot.", type="warning")
	return {}
	if input.icase():
	words = words.lower()
	return {w: i for (i, w) in enumerate(reversed(words.split()))}

	@output
	@render.plot(alt="Dispersion plot of chosen words in input text")
	def dispersion_plot():
	text = tokenized_text()
	words = split_words()
	if not (text and words):
	return

	xs = []
	ys = []
	for i, tok in enumerate(text):
	if tok in words:
	xs.append(i)
	ys.append(words[tok])

	fig, ax = plt.subplots()
	ax.plot(xs, ys, marker="\|", markersize=20, linestyle="")
	ax.set_yticks(range(len(words)), labels=list(words))
	ax.tick_params(axis="both", length=0)
	ax.set_xlabel("Word offset")
	return fig

	@output
	@render.table()
	def freq_dist():
	text = tokenized_text()
	words = set(split_words())
	if not (text and words):
	return

	freq_dist = Counter(t for t in text if t in words)
	df = pd.DataFrame(freq_dist.most_common(), columns=["word", "frequency"])
	return df


	app = App(app_ui, server)