Skip to content

Instantly share code, notes, and snippets.

@ahalterman
Created March 13, 2018 20:21
Show Gist options
  • Save ahalterman/12610100936eadeda92585dad617ea94 to your computer and use it in GitHub Desktop.
Save ahalterman/12610100936eadeda92585dad617ea94 to your computer and use it in GitHub Desktop.
Event Data in 30 Lines of Python
import spacy
nlp = spacy.load("en_core_web_lg")
with open("scraped.json", "r") as f:
news = json.load(f)
news = [i['body'] for i in news]
processed_docs = list(nlp.pipe(news))
verb_list = ["launch", "begin", "initiate", "start"]
dobj_list = ["attack", "offensive", "operation", "assault"]
def detect_event(doc, verb_list, dobj_list):
for word in doc:
if word.dep_ == "ROOT" and word.lemma_ in verb_list:
for subword in word.children:
if subword.dep_ == "dobj" and subword.lemma_ in dobj_list:
return word
def actor_extractor(root):
for child in root.children:
if child.dep_ == "nsubj":
nsubj = child.text
nsubj_subtree = ''.join(w.text_with_ws for w in child.subtree).strip()
return nsubj_subtree
for doc in processed_docs:
root = detect_event(doc, verb_list, dobj_list)
if root:
actor = actor_extractor(root)
if actor:
print("actor: ", actor, "root: ", root)
else:
print("No event detected")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment