Aunsiels/be_processing.py

## be_processing.py
def process_marked(marked, doc):
    res = []
    pos = 2
    done = False
    forbidden = None
    auxil = doc[1].text
    start_index = 2
    if doc[2].dep_ == "neg":
        auxil += " not"
        start_index = 3
        pos = 3
    for token in doc[start_index:]:
        deps = ["det", "acomp", "advmod", "amod", "neg", "prep", "nummod"]
        ancestors = list(token.ancestors)
        ancestors_new = []
        if marked in ancestors:
            ancestors_new = ancestors[:ancestors.index(marked)]
        ancestors_deps = [x.dep_ != "nsubj" for x in ancestors_new]
        if marked in ancestors and token.dep_ in deps and not done and pos != start_index and all(ancestors_deps)\
                and forbidden not in ancestors:
            if marked is not None and marked.tag_ == "VBG" and forbidden is None and marked.i == start_index:
                forbidden = token
            else:
                res.append(auxil)
                done = True
                marked = None
        if marked == token and pos != start_index and not done:
            done = True
            res.append(auxil)
        if token.text != "?":
            res.append(token.text)
        pos += 1
    if done:
        return " ".join(res)
    else:
        return ""

def be_processing(question):
    question = question.replace("  ", " ").replace("isnt", "isn't")
    doc = nlp(question)
    if len(doc) == 4:
        return " ".join([doc[2].text, doc[1].text, doc[3].text])
    ancestors = list(doc[1].ancestors)
    if "aux" in doc[1].dep_ and len(ancestors) == 1:
        res = process_marked(ancestors[0], doc)
        if res:
            return res
    mini = len(doc)
    best_temp = []
    best_res = ""
    for dep in ["acomp", "attr", "npadvmod", "advmod", "prep", "ccomp"]:
        temp = [x for x in doc[1].children if x.dep_ == dep and x.i > 2]
        if len(temp) == 1:
            res = process_marked(temp[0], doc)
            if temp[0].i < mini and res:
                mini = temp[0].i
                best_temp = temp
                best_res = res
    #if best_res:
    #    return best_res
    temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
    if temp:
        temp = [x for x in temp[0].children if x.dep_ == "amod" and x.i > temp[0].i]
        if len(temp) == 0:
            temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
            temp = [x for x in temp[0].children if x.dep_ == "acl" and x.i > temp[0].i]
        if len(temp) == 0 and not best_res:
            temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
            temp = [x for x in temp[0].children if x.dep_ == "prep" and x.i > temp[0].i]
    if len(temp) == 1:
        res = process_marked(temp[0], doc)
        if temp[0].i < mini and res:
            mini = temp[0].i
            best_temp = temp
            best_res = res
        #if res:
        #    return res
    if best_res:
        return best_res
    return ""
	def process_marked(marked, doc):
	res = []
	pos = 2
	done = False
	forbidden = None
	auxil = doc[1].text
	start_index = 2
	if doc[2].dep_ == "neg":
	auxil += " not"
	start_index = 3
	pos = 3
	for token in doc[start_index:]:
	deps = ["det", "acomp", "advmod", "amod", "neg", "prep", "nummod"]
	ancestors = list(token.ancestors)
	ancestors_new = []
	if marked in ancestors:
	ancestors_new = ancestors[:ancestors.index(marked)]
	ancestors_deps = [x.dep_ != "nsubj" for x in ancestors_new]
	if marked in ancestors and token.dep_ in deps and not done and pos != start_index and all(ancestors_deps)\
	and forbidden not in ancestors:
	if marked is not None and marked.tag_ == "VBG" and forbidden is None and marked.i == start_index:
	forbidden = token
	else:
	res.append(auxil)
	done = True
	marked = None
	if marked == token and pos != start_index and not done:
	done = True
	res.append(auxil)
	if token.text != "?":
	res.append(token.text)
	pos += 1
	if done:
	return " ".join(res)
	else:
	return ""

	def be_processing(question):
	question = question.replace(" ", " ").replace("isnt", "isn't")
	doc = nlp(question)
	if len(doc) == 4:
	return " ".join([doc[2].text, doc[1].text, doc[3].text])
	ancestors = list(doc[1].ancestors)
	if "aux" in doc[1].dep_ and len(ancestors) == 1:
	res = process_marked(ancestors[0], doc)
	if res:
	return res
	mini = len(doc)
	best_temp = []
	best_res = ""
	for dep in ["acomp", "attr", "npadvmod", "advmod", "prep", "ccomp"]:
	temp = [x for x in doc[1].children if x.dep_ == dep and x.i > 2]
	if len(temp) == 1:
	res = process_marked(temp[0], doc)
	if temp[0].i < mini and res:
	mini = temp[0].i
	best_temp = temp
	best_res = res
	#if best_res:
	# return best_res
	temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
	if temp:
	temp = [x for x in temp[0].children if x.dep_ == "amod" and x.i > temp[0].i]
	if len(temp) == 0:
	temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
	temp = [x for x in temp[0].children if x.dep_ == "acl" and x.i > temp[0].i]
	if len(temp) == 0 and not best_res:
	temp = [x for x in doc[1].children if x.dep_ == "nsubj"]
	temp = [x for x in temp[0].children if x.dep_ == "prep" and x.i > temp[0].i]
	if len(temp) == 1:
	res = process_marked(temp[0], doc)
	if temp[0].i < mini and res:
	mini = temp[0].i
	best_temp = temp
	best_res = res
	#if res:
	# return res
	if best_res:
	return best_res
	return ""