aniruddha27/nlp_ie_9.py

## nlp_ie_9.py
# function to find sentences containing PMs of India
def find_names(text):

    names = []

    # spacy doc
    doc = nlp(text)

    # pattern
    pattern = [{'LOWER':'prime'},
              {'LOWER':'minister'},
              {'POS':'ADP','OP':'?'},
              {'POS':'PROPN'}]

    # Matcher class object
    matcher = Matcher(nlp.vocab)
    matcher.add("names", None, pattern)

    matches = matcher(doc)

    # finding patterns in the text
    for i in range(0,len(matches)):

        # match: id, start, end
        token = doc[matches[i][1]:matches[i][2]]
        # append token to list
        names.append(str(token))

    # Only keep sentences containing Indian PMs
    for name in names:
        if (name.split()[2] == 'of') and (name.split()[3] != "India"):
                names.remove(name)

    return names

# apply function
df2['PM_Names'] = df2['Sent'].apply(find_names)
	# function to find sentences containing PMs of India
	def find_names(text):

	names = []

	# spacy doc
	doc = nlp(text)

	# pattern
	pattern = [{'LOWER':'prime'},
	{'LOWER':'minister'},
	{'POS':'ADP','OP':'?'},
	{'POS':'PROPN'}]

	# Matcher class object
	matcher = Matcher(nlp.vocab)
	matcher.add("names", None, pattern)

	matches = matcher(doc)

	# finding patterns in the text
	for i in range(0,len(matches)):

	# match: id, start, end
	token = doc[matches[i][1]:matches[i][2]]
	# append token to list
	names.append(str(token))

	# Only keep sentences containing Indian PMs
	for name in names:
	if (name.split()[2] == 'of') and (name.split()[3] != "India"):
	names.remove(name)

	return names

	# apply function
	df2['PM_Names'] = df2['Sent'].apply(find_names)