Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
# Apply coreference resolution
import pandas as pd
import numpy as np
import spacy
from spacy import displacy
import neuralcoref
## For all articles
df = pd.read_csv('all_articlesv2.csv')
#is_label = df['class']!='notKnown'
df = df[(df['date'] > '2020-01-15')]
df_label = df #[is_label]
df_label = df_label.rename(columns={"class": "clas"})
coref_df = pd.DataFrame()
nlp = spacy.load('en')
for i in range(len(df_label)):
article = df_label.iloc[i].text
doc = nlp(article)
if i <13000:
print ('Article no.= ', i)
print ('++++++++++++++++++')
print ('---------------------------------------------------------')
dframe = pd.DataFrame([{'Nr': df_label.iloc[i].text, 'ids': df_label.iloc[i].ids, 'date': df_label.iloc[i].date, 'url': df_label.iloc[i].url, 'title': df_label.iloc[i].title, "text" : df_label.iloc[i].text, 'text_coref': doc._.coref_resolved, 'country': df_label.iloc[i].country, 'months': df_label.iloc[i].months, 'class': df_label.iloc[i].clas, 'impact': df_label.iloc[i].impact}])
coref_df = pd.concat([coref_df, dframe])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment