Skip to content

Instantly share code, notes, and snippets.

@aniruddha27
Created June 5, 2020 21:03
Show Gist options
  • Save aniruddha27/a39e114707cdb0ba194da3125a0cc903 to your computer and use it in GitHub Desktop.
Save aniruddha27/a39e114707cdb0ba194da3125a0cc903 to your computer and use it in GitHub Desktop.
# rule to extract initiative name
def sent_subtree(text):
# pattern match for schemes or initiatives
patterns = [r'\b(?i)'+'plan'+r'\b',
r'\b(?i)'+'programme'+r'\b',
r'\b(?i)'+'scheme'+r'\b',
r'\b(?i)'+'campaign'+r'\b',
r'\b(?i)'+'initiative'+r'\b',
r'\b(?i)'+'conference'+r'\b',
r'\b(?i)'+'agreement'+r'\b',
r'\b(?i)'+'alliance'+r'\b']
schemes = []
doc = nlp(text)
flag = 0
# if no initiative present in sentence
for pat in patterns:
if re.search(pat, text) != None:
flag = 1
break
if flag == 0:
return schemes
# iterating over sentence tokens
for token in doc:
for pat in patterns:
# if we get a pattern match
if re.search(pat, token.text) != None:
word = ''
# iterating over token subtree
for node in token.subtree:
# only extract the proper nouns
if (node.pos_ == 'PROPN'):
word += node.text+' '
if len(word)!=0:
schemes.append(word)
return schemes
# derive initiatives
df2['Schemes2'] = df2['Sent'].apply(sent_subtree)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment