Skip to content

Instantly share code, notes, and snippets.

@chssch
Last active March 1, 2023 18:01
Show Gist options
  • Save chssch/f788cfd227cb94d0843235a2542026fd to your computer and use it in GitHub Desktop.
Save chssch/f788cfd227cb94d0843235a2542026fd to your computer and use it in GitHub Desktop.
Use AllenNLP Semantic Role Labeling (http://allennlp.org/) with SpaCy 2.0 (http://spacy.io) components and extensions
# This small script shows how to use AllenNLP Semantic Role Labeling (http://allennlp.org/) with SpaCy 2.0 (http://spacy.io) components and extensions
# Script installs allennlp default model
# Important: Install allennlp form source and replace the spacy requirement with spacy-nightly in the requirements.txt
# Developed for SpaCy 2.0.0a18
from allennlp.commands import DEFAULT_MODELS
from allennlp.common.file_utils import cached_path
from allennlp.service.predictors import SemanticRoleLabelerPredictor
from allennlp.models.archival import load_archive
import spacy
from spacy.tokens import Token
class SRLComponent(object):
'''
A SpaCy pipeline component for SRL
'''
name = 'Semantic Role Labeler'
def __init__(self):
archive = load_archive(self._get_srl_model())
self.predictor = SemanticRoleLabelerPredictor.from_archive(archive, "semantic-role-labeling")
Token.set_extension('srl_arg0')
Token.set_extension('srl_arg1')
def __call__(self, doc):
# See https://github.com/allenai/allennlp/blob/master/allennlp/service/predictors/semantic_role_labeler.py#L74
words = [token.text for token in doc]
for i, word in enumerate(doc):
if word.pos_ == "VERB":
verb = word.text
verb_labels = [0 for _ in words]
verb_labels[i] = 1
instance = self.predictor._dataset_reader.text_to_instance(doc, verb_labels)
output = self.predictor._model.forward_on_instance(instance, -1)
tags = output['tags']
# TODO: Tagging/dependencies can be done more elegant
if "B-ARG0" in tags:
start = tags.index("B-ARG0")
end = max([i for i, x in enumerate(tags) if x == "I-ARG0"] + [start]) + 1
word._.set("srl_arg0", doc[start:end])
if "B-ARG1" in tags:
start = tags.index("B-ARG1")
end = max([i for i, x in enumerate(tags) if x == "I-ARG1"] + [start]) + 1
word._.set("srl_arg1", doc[start:end])
return doc
def _get_srl_model(self):
return cached_path(DEFAULT_MODELS['semantic-role-labeling'])
def demo():
nlp = spacy.load("en")
nlp.add_pipe(SRLComponent(), after='ner')
doc = nlp("Apple sold 1 million Plumbuses this month.")
for w in doc:
if w.pos_ == "VERB":
print("('{}', '{}', '{}')".format(w._.srl_arg0, w, w._.srl_arg1))
# ('Apple', 'sold', '1 million Plumbuses)
@lan2720
Copy link

lan2720 commented Mar 19, 2018

@richliao
Copy link

richliao commented Jun 7, 2018

I'm getting "Maximum recursion depth exceeded" error in the statement of
archive = load_archive(args.archive_file,
weights_file=None,
cuda_device=args.cuda_device,
overrides="")
I'm running on a Mac that doesn't have cuda_device. Will it be the problem?

@vkhetan
Copy link

vkhetan commented Mar 6, 2019

https://gist.github.com/lan2720/b83f4b3e2a5375050792c4fc2b0c8ece
I write this one that works well.

I am getting maximum recursion depth error. I did change some part based on current allennlp library but can't get rid of recursion error. Any pointers!!!

@danyaljj
Copy link

What AllenNLP version is used here?

@narayanacharya6
Copy link

narayanacharya6 commented May 6, 2021

If you want to use newer versions of allennlp (2.4.0), allennlp-models (2.4.0) and spacy (3.0.6) for this, below might be a good starting point:

from allennlp.predictors.predictor import Predictor
from spacy.language import Language
from spacy.tokens import Doc


@Language.factory("srl", default_config={
    "model_path": "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz"})
def create_srl_component(nlp: Language, name: str, model_path: str):
    return SRLComponent(nlp, model_path)


class SRLComponent:

    def __init__(self, nlp: Language, model_path: str):
        if not Doc.has_extension("srl"):
            Doc.set_extension("srl", default=None)
        self.predictor = Predictor.from_path(model_path)

    def __call__(self, doc: Doc):
        predictions = self.predictor.predict(sentence=doc.text)
        doc._.srl = predictions
        return doc


if __name__ == '__main__':
    import spacy
    nlp = spacy.blank('en')
    nlp.add_pipe("srl")
    doc = nlp("The dog trashed the apartment in under 30 seconds.")
    print(doc._.srl)

@felgaet
Copy link

felgaet commented Jul 16, 2021

Hello @narayanacharya6,
Is there a quick way to print the result of the semantic role labelling in a file that respects the CoNLL format?
What I would like to do is convert "doc._.srl" to CoNLL format.
Thank you

@narayanacharya6
Copy link

@felgaet I've used this previously for converting docs to conll - https://github.com/BramVanroy/spacy_conll
I don't know if this is exactly what you are looking for but might be a starting point to where you want to get.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment