Skip to content

Instantly share code, notes, and snippets.

@ines ines/Install

Last active Jul 29, 2020
Embed
What would you like to do?
Streamlit + spaCy
pip install streamlit
pip install spacy
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
python -m spacy download de_core_news_sm
import streamlit as st
import spacy
from spacy import displacy
import pandas as pd
SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
@st.cache(ignore_hash=True)
def load_model(name):
return spacy.load(name)
@st.cache(ignore_hash=True)
def process_text(model_name, text):
nlp = load_model(model_name)
return nlp(text)
st.sidebar.title("Interactive spaCy visualizer")
st.sidebar.markdown(
"""
Process text with [spaCy](https://spacy.io) models and visualize named entities,
dependencies and more. Uses spaCy's built-in
[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
"""
)
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state = st.info(f"Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()
text = st.text_area("Text to analyze", DEFAULT_TEXT)
doc = process_text(spacy_model, text)
if "parser" in nlp.pipe_names:
st.header("Dependency Parse & Part-of-speech tags")
st.sidebar.header("Dependency Parse")
split_sents = st.sidebar.checkbox("Split sentences", value=True)
collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
collapse_phrases = st.sidebar.checkbox("Collapse phrases")
compact = st.sidebar.checkbox("Compact mode")
options = {
"collapse_punct": collapse_punct,
"collapse_phrases": collapse_phrases,
"compact": compact,
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options)
# Double newlines seem to mess with the rendering
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
if "ner" in nlp.pipe_names:
st.header("Named Entities")
st.sidebar.header("Named Entities")
default_labels = ["PERSON", "ORG", "GPE", "LOC"]
labels = st.sidebar.multiselect(
"Entity labels", nlp.get_pipe("ner").labels, default_labels
)
html = displacy.render(doc, style="ent", options={"ents": labels})
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
if "entity_linker" in nlp.pipe_names:
attrs.append("kb_id_")
data = [
[str(getattr(ent, attr)) for attr in attrs]
for ent in doc.ents
if ent.label_ in labels
]
df = pd.DataFrame(data, columns=attrs)
st.dataframe(df)
if "textcat" in nlp.pipe_names:
st.header("Text Classification")
st.markdown(f"> {text}")
df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
st.dataframe(df)
vector_size = nlp.meta.get("vectors", {}).get("width", 0)
if vector_size:
st.header("Vectors & Similarity")
st.code(nlp.meta["vectors"])
text1 = st.text_input("Text or word 1", "apple")
text2 = st.text_input("Text or word 2", "orange")
doc1 = process_text(spacy_model, text1)
doc2 = process_text(spacy_model, text2)
similarity = doc1.similarity(doc2)
if similarity > 0.5:
st.success(similarity)
else:
st.error(similarity)
st.header("Token attributes")
if st.button("Show token attributes"):
attrs = [
"idx",
"text",
"lemma_",
"pos_",
"tag_",
"dep_",
"head",
"ent_type_",
"ent_iob_",
"shape_",
"is_alpha",
"is_ascii",
"is_digit",
"is_punct",
"like_num",
]
data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
df = pd.DataFrame(data, columns=attrs)
st.dataframe(df)
st.header("JSON Doc")
if st.button("Show JSON Doc"):
st.json(doc.to_json())
st.header("JSON model meta")
if st.button("Show JSON model meta"):
st.json(nlp.meta)
@e-tony

This comment has been minimized.

Copy link

e-tony commented Jun 4, 2020

Nice example!

The ignore_hash argument has been renamed to allow_output_mutation here. This should be changed in lines 12 and 17.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.