Skip to content

Instantly share code, notes, and snippets.

@AnnMarieW
Last active December 16, 2022 18:58
Show Gist options
  • Save AnnMarieW/6916be87b6e87250e998fc128b3f65cc to your computer and use it in GitHub Desktop.
Save AnnMarieW/6916be87b6e87250e998fc128b3f65cc to your computer and use it in GitHub Desktop.
spacy named entities app
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import base64
nlp = spacy.load("en_core_web_sm")
initial_text = "I went to Seattle"
#initial_text = """In ancient Rome, some neighbors live in three adjacent houses. In the center is the house of Senex, who lives there with wife Domina, son Hero, and several slaves, including head slave Hysterium and the musical's main character Pseudolus. A slave belonging to Hero, Pseudolus wishes to buy, win, or steal his freedom. One of the neighboring houses is owned by Marcus Lycus, who is a buyer and seller of beautiful women; the other belongs to the ancient Erronius, who is abroad searching for his long-lost children (stolen in infancy by pirates). One day, Senex and Domina go on a trip and leave Pseudolus in charge of Hero. Hero confides in Pseudolus that he is in love with the lovely Philia, one of the courtesans in the House of Lycus (albeit still a virgin)."""
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
def get_svg(svg: str, style: str = ""):
"""Convert an SVG to a base64-encoded image."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
return html
app.layout = dbc.Container(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-4 mb-4",
),
html.Div("Enter text to analyze"),
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
html.H4("Dependency Parse and Part of Speech Tags"),
dbc.Card(
dcc.Markdown(id="html", dangerously_allow_html=True),
body=True,
className="mb-5",
),
],
)
@app.callback(
Output("html", "children"), Input("user-input", "value")
)
def display(text):
if text is None:
return None, None
# new lines mess up the displacy renderer
text = text.replace("\n", " ")
doc = nlp(text)
sentence_spans = list(doc.sents)
for sent in sentence_spans:
html = displacy.render(
sent, style="dep",
)
return get_svg(html)
if __name__ == "__main__":
app.run_server(debug=True)
import dash
from dash import Dash, dcc, html, Input, Output, State, MATCH
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container(
dbc.Row(
dbc.Col(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-2 mb-4",
),
dbc.Button(
"Add Text Input area",
id="pattern-match-add-card",
n_clicks=0,
className="mb-3",
),
html.Div(id="pattern-match-container", children=[], className="mt-4"),
]
)
),
fluid=True,
)
def get_entities(input_text):
# new lines mess up the displacy renderer
input_text = input_text.replace("\n", " ")
doc = nlp(input_text)
return displacy.render(doc.ents, style="ent")
def make_card(n_clicks):
return dbc.Card(
[
dbc.CardHeader(
[
f"Text Input {n_clicks + 1} ",
html.Div(
dbc.Button(
"X",
id={"type": "delete-card", "index": n_clicks},
n_clicks=0,
color="secondary",
),
className="ms-auto",
),
],
className="hstack",
),
dbc.Row(
[
dbc.Col(
dcc.Textarea(
id={"type": "text-input", "index": n_clicks},
className="w-100",
),
),
dbc.Col(
dcc.Markdown(
id={"type": "entities", "index": n_clicks},
dangerously_allow_html=True,
),
),
]
),
],
className="m-1",
id={"type": "card", "index": n_clicks},
)
@app.callback(
Output("pattern-match-container", "children"),
Input("pattern-match-add-card", "n_clicks"),
State("pattern-match-container", "children"),
)
def add_card(
n_clicks,
cards,
):
new_card = make_card(n_clicks)
cards.append(new_card)
return cards
@app.callback(
Output({"type": "card", "index": MATCH}, "style"),
Input({"type": "delete-card", "index": MATCH}, "n_clicks"),
prevent_initial_call=True,
)
def remove_card(_):
return {"display": "none"}
@app.callback(
Output({"type": "entities", "index": MATCH}, "children"),
Input({"type": "text-input", "index": MATCH}, "value"),
)
def update_figure(text_input):
if text_input is None:
return dash.no_update
return get_entities(text_input)
if __name__ == "__main__":
app.run_server(debug=True)
from dash import Dash, dash_table, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import pandas as pd
from spacy.displacy.render import DEFAULT_LABEL_COLORS
nlp = spacy.load("en_core_web_sm")
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
options = [
{
"label": html.Div(
[label],
style={
"background-color": color,
"font-weight": "bold",
"padding": 5,
"border-radius": "0.35em",
},
),
"value": label,
}
for label, color in DEFAULT_LABEL_COLORS.items()
]
label_dropdown = dbc.InputGroup(
[
dcc.Dropdown(options, multi=True, id="label-dropdown", style={"width": 400}),
dbc.Button("Select All", id="all"),
]
)
app.layout = dbc.Container(label_dropdown)
@app.callback(
Output("label-dropdown", "value"),
Input("all", "n_clicks"),
)
def select_all_labels(_):
return list(DEFAULT_LABEL_COLORS.keys())
if __name__ == "__main__":
app.run_server(debug=True)
from dash import Dash, dash_table, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import spacy
from spacy import displacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")
initial_text = "I went to Seattle"
NER_ATTRS = ["text", "start_char", "end_char", "label_",]
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
table = dash_table.DataTable(
id="table",
columns=[{"name": c, "id": c} for c in NER_ATTRS + ["description"]],
filter_action="native",
sort_action="native",
page_size=10,
style_table={"overflowX": "auto"},
)
app.layout = dbc.Container(
[
html.H3(
"Natural Language Processing with spaCy",
className="text-center bg-primary text-white p-4 mb-4",
),
html.Div("Enter text to analyze"),
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"),
html.H4("Named Entities"),
dbc.Card(
dcc.Markdown(id="html", dangerously_allow_html=True),
body=True,
className="mb-5",
),
html.Div(table),
],
)
@app.callback(
Output("html", "children"), Output("table", "data"), Input("user-input", "value")
)
def display(text):
if text is None:
return None, None
# new lines mess up the displacy renderer
text = text.replace("\n", " ")
doc = nlp(text)
html = displacy.render(doc, style="ent")
# if doc.ents:
table_data = [
[str(getattr(ent, attr)) for attr in NER_ATTRS]
for ent in doc.ents
# if ent.label_ in label_select
]
if table_data:
dff = pd.DataFrame(table_data, columns=NER_ATTRS)
dff["description"] = dff["label_"].apply(lambda x: spacy.explain(x))
return html, dff.to_dict("records")
return html, None
if __name__ == "__main__":
app.run_server(debug=True)
@AnnMarieW
Copy link
Author

AnnMarieW commented Dec 16, 2022

dependency parse
spacy-dependency-parse



named entities
named-entities



ent label dropdown

ent-label-dropdown



ent-with table



spacy-ent-app


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment