Last active
January 28, 2019 08:21
-
-
Save Aunsiels/a89c4f794a0b7def0d4b50ecdfc8b249 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process_marked(marked, doc): | |
res = [] | |
pos = 2 | |
done = False | |
forbidden = None | |
auxil = doc[1].text | |
start_index = 2 | |
if doc[2].dep_ == "neg": | |
auxil += " not" | |
start_index = 3 | |
pos = 3 | |
for token in doc[start_index:]: | |
deps = ["det", "acomp", "advmod", "amod", "neg", "prep", "nummod"] | |
ancestors = list(token.ancestors) | |
ancestors_new = [] | |
if marked in ancestors: | |
ancestors_new = ancestors[:ancestors.index(marked)] | |
ancestors_deps = [x.dep_ != "nsubj" for x in ancestors_new] | |
if marked in ancestors and token.dep_ in deps and not done and pos != start_index and all(ancestors_deps)\ | |
and forbidden not in ancestors: | |
if marked is not None and marked.tag_ == "VBG" and forbidden is None and marked.i == start_index: | |
forbidden = token | |
else: | |
res.append(auxil) | |
done = True | |
marked = None | |
if marked == token and pos != start_index and not done: | |
done = True | |
res.append(auxil) | |
if token.text != "?": | |
res.append(token.text) | |
pos += 1 | |
if done: | |
return " ".join(res) | |
else: | |
return "" | |
def be_processing(question): | |
question = question.replace(" ", " ").replace("isnt", "isn't") | |
doc = nlp(question) | |
if len(doc) == 4: | |
return " ".join([doc[2].text, doc[1].text, doc[3].text]) | |
ancestors = list(doc[1].ancestors) | |
if "aux" in doc[1].dep_ and len(ancestors) == 1: | |
res = process_marked(ancestors[0], doc) | |
if res: | |
return res | |
mini = len(doc) | |
best_temp = [] | |
best_res = "" | |
for dep in ["acomp", "attr", "npadvmod", "advmod", "prep", "ccomp"]: | |
temp = [x for x in doc[1].children if x.dep_ == dep and x.i > 2] | |
if len(temp) == 1: | |
res = process_marked(temp[0], doc) | |
if temp[0].i < mini and res: | |
mini = temp[0].i | |
best_temp = temp | |
best_res = res | |
#if best_res: | |
# return best_res | |
temp = [x for x in doc[1].children if x.dep_ == "nsubj"] | |
if temp: | |
temp = [x for x in temp[0].children if x.dep_ == "amod" and x.i > temp[0].i] | |
if len(temp) == 0: | |
temp = [x for x in doc[1].children if x.dep_ == "nsubj"] | |
temp = [x for x in temp[0].children if x.dep_ == "acl" and x.i > temp[0].i] | |
if len(temp) == 0 and not best_res: | |
temp = [x for x in doc[1].children if x.dep_ == "nsubj"] | |
temp = [x for x in temp[0].children if x.dep_ == "prep" and x.i > temp[0].i] | |
if len(temp) == 1: | |
res = process_marked(temp[0], doc) | |
if temp[0].i < mini and res: | |
mini = temp[0].i | |
best_temp = temp | |
best_res = res | |
#if res: | |
# return res | |
if best_res: | |
return best_res | |
return "" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment