Skip to content

Instantly share code, notes, and snippets.

@Nikhel1
Created March 26, 2020 01:48
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save Nikhel1/b180a255b7fe974892d7d80233d7650e to your computer and use it in GitHub Desktop.
# Apply coreference resolution
import pandas as pd
import numpy as np
import spacy
from spacy import displacy
import neuralcoref
## For all articles
df = pd.read_csv('all_articlesv2.csv')
#is_label = df['class']!='notKnown'
df = df[(df['date'] > '2020-01-15')]
df_label = df #[is_label]
df_label = df_label.rename(columns={"class": "clas"})
coref_df = pd.DataFrame()
nlp = spacy.load('en')
neuralcoref.add_to_pipe(nlp)
for i in range(len(df_label)):
article = df_label.iloc[i].text
doc = nlp(article)
if i <13000:
print ('Article no.= ', i)
print(article)
print ('++++++++++++++++++')
print(doc._.coref_resolved)
print ('---------------------------------------------------------')
dframe = pd.DataFrame([{'Nr': df_label.iloc[i].text, 'ids': df_label.iloc[i].ids, 'date': df_label.iloc[i].date, 'url': df_label.iloc[i].url, 'title': df_label.iloc[i].title, "text" : df_label.iloc[i].text, 'text_coref': doc._.coref_resolved, 'country': df_label.iloc[i].country, 'months': df_label.iloc[i].months, 'class': df_label.iloc[i].clas, 'impact': df_label.iloc[i].impact}])
coref_df = pd.concat([coref_df, dframe])
coref_df.to_csv('all_articlesv1_all_coref_v2.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment