Skip to content

Instantly share code, notes, and snippets.

@chiefastro
Created November 30, 2020 03:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chiefastro/9066672e5fe54a171a29c51a97385333 to your computer and use it in GitHub Desktop.
Save chiefastro/9066672e5fe54a171a29c51a97385333 to your computer and use it in GitHub Desktop.
Display token meta with spacy
import pandas as pd
import spacy
from spacy import displacy
# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')
# sentence for grammar rules
text = """He does not eat meat, but he loves Beyond Burgers."""
# apply pipeline
doc = nlp(text)
# collect token meta into df
def collect_token_meta(
doc: spacy.tokens.Doc
) -> pd.DataFrame:
"""Collect useful token meta data into a convenient dataframe"""
# loop through tokens in doc
token_meta = [
{
'text': t.text,
'lemma': t.lemma_,
'ent': t.ent_type_,
'pos': t.pos_,
'tag': t.tag_,
'dep': t.dep_,
'ancestors': [tt.text for tt in t.ancestors],
'children': [tt.text for tt in t.children],
'subtree': [tt.text for tt in t.subtree],
'lefts': [tt.text for tt in t.lefts],
'rights': [tt.text for tt in t.rights]
}
for t in doc
]
token_df = pd.DataFrame(token_meta)
return token_df
display(collect_token_meta(doc))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment