Skip to content

Instantly share code, notes, and snippets.

@aniruddha27 aniruddha27/nlp_ie_9.py
Last active Jun 5, 2020

Embed
What would you like to do?
# function to find sentences containing PMs of India
def find_names(text):
names = []
# spacy doc
doc = nlp(text)
# pattern
pattern = [{'LOWER':'prime'},
{'LOWER':'minister'},
{'POS':'ADP','OP':'?'},
{'POS':'PROPN'}]
# Matcher class object
matcher = Matcher(nlp.vocab)
matcher.add("names", None, pattern)
matches = matcher(doc)
# finding patterns in the text
for i in range(0,len(matches)):
# match: id, start, end
token = doc[matches[i][1]:matches[i][2]]
# append token to list
names.append(str(token))
# Only keep sentences containing Indian PMs
for name in names:
if (name.split()[2] == 'of') and (name.split()[3] != "India"):
names.remove(name)
return names
# apply function
df2['PM_Names'] = df2['Speech_clean'].apply(find_names)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.