Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# to extract initiatives using pattern matching
def all_schemes(text,check):
schemes = []
doc = nlp(text)
# initiatives
prog_list = ['programme','scheme',
'initiative','campaign',
'agreement','conference',
'alliance','plan']
# pattern to match initiatives names
pattern = [{'POS':'DET'},
{'POS':'PROPN','DEP':'compound'},
{'POS':'PROPN','DEP':'compound'},
{'POS':'PROPN','OP':'?'},
{'POS':'PROPN','OP':'?'},
{'POS':'PROPN','OP':'?'},
{'LOWER':{'IN':prog_list},'OP':'+'}
]
if check == 0:
# return blank list
return schemes
# Matcher class object
matcher = Matcher(nlp.vocab)
matcher.add("matching", None, pattern)
matches = matcher(doc)
for i in range(0,len(matches)):
# match: id, start, end
start, end = matches[i][1], matches[i][2]
if doc[start].pos_=='DET':
start = start+1
# matched string
span = str(doc[start:end])
if (len(schemes)!=0) and (schemes[-1] in span):
schemes[-1] = span
else:
schemes.append(span)
return schemes
# apply function
df2['Schemes1'] = df2.apply(lambda x:all_schemes(x.Sent,x.Check_Schemes),axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment