Skip to content

Instantly share code, notes, and snippets.

Last active September 21, 2023 17:14
Show Gist options
  • Save ines/b320cb8441b590eedf19137599ce6685 to your computer and use it in GitHub Desktop.
Save ines/b320cb8441b590eedf19137599ce6685 to your computer and use it in GitHub Desktop.
Streamlit + spaCy
pip install streamlit
pip install spacy
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
python -m spacy download de_core_news_sm
import streamlit as st
import spacy
from spacy import displacy
import pandas as pd
SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
def load_model(name):
return spacy.load(name)
def process_text(model_name, text):
nlp = load_model(model_name)
return nlp(text)
st.sidebar.title("Interactive spaCy visualizer")
Process text with [spaCy]( models and visualize named entities,
dependencies and more. Uses spaCy's built-in
[displaCy]( visualizer under the hood.
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state ="Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
text = st.text_area("Text to analyze", DEFAULT_TEXT)
doc = process_text(spacy_model, text)
if "parser" in nlp.pipe_names:
st.header("Dependency Parse & Part-of-speech tags")
st.sidebar.header("Dependency Parse")
split_sents = st.sidebar.checkbox("Split sentences", value=True)
collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
collapse_phrases = st.sidebar.checkbox("Collapse phrases")
compact = st.sidebar.checkbox("Compact mode")
options = {
"collapse_punct": collapse_punct,
"collapse_phrases": collapse_phrases,
"compact": compact,
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options)
# Double newlines seem to mess with the rendering
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
if "ner" in nlp.pipe_names:
st.header("Named Entities")
st.sidebar.header("Named Entities")
default_labels = ["PERSON", "ORG", "GPE", "LOC"]
labels = st.sidebar.multiselect(
"Entity labels", nlp.get_pipe("ner").labels, default_labels
html = displacy.render(doc, style="ent", options={"ents": labels})
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
if "entity_linker" in nlp.pipe_names:
data = [
[str(getattr(ent, attr)) for attr in attrs]
for ent in doc.ents
if ent.label_ in labels
df = pd.DataFrame(data, columns=attrs)
if "textcat" in nlp.pipe_names:
st.header("Text Classification")
st.markdown(f"> {text}")
df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
vector_size = nlp.meta.get("vectors", {}).get("width", 0)
if vector_size:
st.header("Vectors & Similarity")
text1 = st.text_input("Text or word 1", "apple")
text2 = st.text_input("Text or word 2", "orange")
doc1 = process_text(spacy_model, text1)
doc2 = process_text(spacy_model, text2)
similarity = doc1.similarity(doc2)
if similarity > 0.5:
st.header("Token attributes")
if st.button("Show token attributes"):
attrs = [
data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
df = pd.DataFrame(data, columns=attrs)
st.header("JSON Doc")
if st.button("Show JSON Doc"):
st.header("JSON model meta")
if st.button("Show JSON model meta"):
Copy link

Nice example!

The ignore_hash argument has been renamed to allow_output_mutation here. This should be changed in lines 12 and 17.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment