Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save zahin-mohammad/75ddbca9783eac0afb78ac901fed8801 to your computer and use it in GitHub Desktop.
Save zahin-mohammad/75ddbca9783eac0afb78ac901fed8801 to your computer and use it in GitHub Desktop.
neuralcoref vs corenlp coref resolution
import os
import neuralcoref
import en_core_web_sm
from stanfordnlp.server import CoreNLPClient
CORENLP_HOST = os.getenv('CORENLP_HOST') or 'localhost'
CORENLP_PORT = os.getenv('CORENLP_PORT') or 9000
CORENLP_URL = f'http://{CORENLP_HOST}:{CORENLP_PORT}'
annotators = 'tokenize, ssplit, pos, lemma, ner, entitymentions, coref, sentiment, openie'
options = {'openie.resolve_coref': True}
client = CoreNLPClient(
annotators=annotators,
options=options,
start_server=False,
endpoint=CORENLP_URL)
nlp = en_core_web_sm.load()
neuralcoref.add_to_pipe(nlp)
def spacy_coref(text: str):
doc = nlp(text)
return doc._.coref_resolved
def corenlp_coref(text: str):
ann = client.annotate(text,
output_format="json",
properties={
'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,coref,openie',
'openie.resolve_coref': 'true',
'pinelineLanguage': 'en'})
# ann.keys() ->
# dict_keys(['sentences', 'corefs'])
# key -> value : (sentence_number, pronoun_start_index) -> (pronoun_end_index, noun)
replacement_map = {}
corefs = ann['corefs']
for ent_id, coref in corefs.items():
if len(coref) == 0:
continue
representative_mention = list(filter(lambda x: x['isRepresentativeMention'], coref))[0]
if representative_mention['type'] == 'PRONOMINAL':
continue
for mention in coref:
if mention == representative_mention:
continue
# mention.keys() ->
# dict_keys(['id', 'text', 'type', 'number', 'gender', 'animacy', 'startIndex', 'endIndex', 'headIndex', 'sentNum', 'position', 'isRepresentativeMention'])
pronoun_start_index = mention['startIndex']
pronoun_end_index = mention['endIndex']
noun = representative_mention['text']
sentence_number = mention['sentNum']
replacement_map[(sentence_number, pronoun_start_index)] = (pronoun_end_index, noun)
new_text = ''
pronoun_end_index = None
sentence_number = 0
sentences = ann['sentences']
for sentence in sentences:
# sentence.keys() ->
# dict_keys(['index', 'parse', 'basicDependencies', 'enhancedDependencies', 'enhancedPlusPlusDependencies', 'openie', 'entitymentions', 'tokens'])
sentence_number += 1 # 1 indexed
sub_text = ''
for token in sentence['tokens']:
# token.keys() ->
# dict_keys(['index', 'word', 'originalText', 'lemma', 'characterOffsetBegin', 'characterOffsetEnd', 'pos', 'ner', 'speaker', 'before', 'after'])
pronoun_start_index = token['index']
# Handles the case where pronoun is larger then one word
if pronoun_end_index is not None and pronoun_start_index < pronoun_end_index:
continue
# Get noun if exists, else copy original text
if (sentence_number, pronoun_start_index) in replacement_map:
pronoun_end_index, noun = replacement_map[(sentence_number, pronoun_start_index)]
sub_text += f'{noun} '
else:
pronoun_end_index = None
if token['pos'].isalnum():
sub_text += f'{token["originalText"]} '
else:
# This case handles punctuation
sub_text = sub_text[:-1]
sub_text += f'{token["originalText"]} '
new_text += f'{sub_text}'
return new_text
examples = [
'Barack was born in Hawaii. His wife Michelle was born in Milan. He says that she is very smart.',
'My sister has a friend called Barack Obama. Really, tell me more about him? She thinks he is so funny!',
'The dog chased the cat. But it escaped.',
'X and Y are neighbours. She admires him because he works hard.',
'John and Mary are neighbours. She admires him because he works hard.',
'Mary and Julie are sisters. They love chocolates.',
'My brother has a dog and he loves her.',
'My sister has a dog and she loves her.',
'My sister has a dog and she loves him. He is cute.',
'My sister has a dog. She loves him.',
'My sister has a dog and she loves him.',
]
for example in examples:
print(f'#########################################################################################################')
print(f'ORIGINAL: {example}')
print()
print(f'spacy_coref:')
print(spacy_coref(example))
print()
print(f'corenlp_coref')
print(corenlp_coref(example))
print()
'''
#########################################################################################################
ORIGINAL: Barack was born in Hawaii. His wife Michelle was born in Milan. He says that she is very smart.
spacy_coref:
Barack was born in Hawaii. Barack wife Michelle was born in Milan. Barack says that His wife Michelle is very smart.
corenlp_coref
Barack was born in Hawaii. Barack wife Michelle was born in Milan. Barack says that His wife Michelle is very smart.
#########################################################################################################
ORIGINAL: My sister has a friend called Barack Obama. Really, tell me more about him? She thinks he is so funny!
spacy_coref:
My sister has a friend called Barack Obama. Really, tell me more about a friend called Barack Obama? My sister thinks a friend called Barack Obama is so funny!
corenlp_coref
My sister has a friend called Barack Obama. Really, tell me more about Barack Obama? My sister thinks Barack Obama is so funny!
#########################################################################################################
ORIGINAL: The dog chased the cat. But it escaped.
spacy_coref:
The dog chased the cat. But The dog escaped.
corenlp_coref
The dog chased the cat. But The dog escaped.
#########################################################################################################
ORIGINAL: X and Y are neighbours. She admires him because he works hard.
spacy_coref:
X and Y are neighbours. She admires him because him works hard.
corenlp_coref
X and Y are neighbours. She admires him because he works hard.
#########################################################################################################
ORIGINAL: John and Mary are neighbours. She admires him because he works hard.
spacy_coref:
John and Mary are neighbours. Mary admires John because John works hard.
corenlp_coref
John and Mary are neighbours. Mary admires John because John works hard.
#########################################################################################################
ORIGINAL: Mary and Julie are sisters. They love chocolates.
spacy_coref:
Mary and Julie are sisters. Mary and Julie love chocolates.
corenlp_coref
Mary and Julie are sisters. They love chocolates.
#########################################################################################################
ORIGINAL: My brother has a dog and he loves her.
spacy_coref:
My brother has a dog and My brother loves My brother.
corenlp_coref
My brother has a dog and My brother loves a dog.
#########################################################################################################
ORIGINAL: My sister has a dog and she loves her.
spacy_coref:
My sister has a dog and My sister loves My sister.
corenlp_coref
My sister has a dog and My sister loves My sister.
#########################################################################################################
ORIGINAL: My sister has a dog and she loves him. He is cute.
spacy_coref:
My sister has a dog and My sister loves a dog. a dog is cute.
corenlp_coref
My sister has a dog and My sister loves him. He is cute.
#########################################################################################################
ORIGINAL: My sister has a dog. She loves him.
spacy_coref:
My sister has a dog. My sister loves a dog.
corenlp_coref
My sister has a dog. My sister loves him.
#########################################################################################################
ORIGINAL: My sister has a dog and she loves him.
spacy_coref:
My sister has a dog and My sister loves a dog.
corenlp_coref
My sister has a dog and My sister loves him.
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment