prateekjoshi565/kg_get_entities.py

## kg_get_entities.py
def get_entities(sent):
  ## chunk 1
  ent1 = ""
  ent2 = ""

  prv_tok_dep = ""    # dependency tag of previous token in the sentence
  prv_tok_text = ""   # previous token in the sentence

  prefix = ""
  modifier = ""

  #############################################################

  for tok in nlp(sent):
    ## chunk 2
    # if token is a punctuation mark then move on to the next token
    if tok.dep_ != "punct":
      # check: token is a compound word or not
      if tok.dep_ == "compound":
        prefix = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          prefix = prv_tok_text + " "+ tok.text

      # check: token is a modifier or not
      if tok.dep_.endswith("mod") == True:
        modifier = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          modifier = prv_tok_text + " "+ tok.text

      ## chunk 3
      if tok.dep_.find("subj") == True:
        ent1 = modifier +" "+ prefix + " "+ tok.text
        prefix = ""
        modifier = ""
        prv_tok_dep = ""
        prv_tok_text = ""

      ## chunk 4
      if tok.dep_.find("obj") == True:
        ent2 = modifier +" "+ prefix +" "+ tok.text

      ## chunk 5
      # update variables
      prv_tok_dep = tok.dep_
      prv_tok_text = tok.text
  #############################################################

  return [ent1.strip(), ent2.strip()]
	def get_entities(sent):
	## chunk 1
	ent1 = ""
	ent2 = ""

	prv_tok_dep = "" # dependency tag of previous token in the sentence
	prv_tok_text = "" # previous token in the sentence

	prefix = ""
	modifier = ""

	#############################################################

	for tok in nlp(sent):
	## chunk 2
	# if token is a punctuation mark then move on to the next token
	if tok.dep_ != "punct":
	# check: token is a compound word or not
	if tok.dep_ == "compound":
	prefix = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	prefix = prv_tok_text + " "+ tok.text

	# check: token is a modifier or not
	if tok.dep_.endswith("mod") == True:
	modifier = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	modifier = prv_tok_text + " "+ tok.text

	## chunk 3
	if tok.dep_.find("subj") == True:
	ent1 = modifier +" "+ prefix + " "+ tok.text
	prefix = ""
	modifier = ""
	prv_tok_dep = ""
	prv_tok_text = ""

	## chunk 4
	if tok.dep_.find("obj") == True:
	ent2 = modifier +" "+ prefix +" "+ tok.text

	## chunk 5
	# update variables
	prv_tok_dep = tok.dep_
	prv_tok_text = tok.text
	#############################################################

	return [ent1.strip(), ent2.strip()]