tabVersion/re_because.py

## re_because.py
# requirements: Use regular expressions to retrieve sentences containing specific
# strings in the text, and align the output.
# Find out the sentences that contain "because...so" in the text, and try to align
# the output with two words as the center. Because...so the middle is all output,
# output 2 bytes or more before and after. If there are multiple "because" for a sentence,
# output them separately. Note that each cause can only be used once.

import re

def find(string: str):
    greedy = re.compile(r'.?.?因为.*所以..')
    non_greedy = re.compile(r'.?.?因为.*?所以..')
    greedy_res = greedy.findall(string)
    for item in greedy_res:
        if greedy.search(item[item.find('因为')+2:-4]) is not None:
            greedy_res.extend(find(item[item.find('因为')+2:-4]))
    non_greedy_res = non_greedy.findall(string)
    for item in non_greedy_res:
        if non_greedy.search(item[item.find('因为')+2:-4]) is not None:
            non_greedy_res.extend(find(item[item.find('因为')+2:-4]))
    if len(greedy_res) > len(non_greedy_res):
        return greedy_res
    else:
        return non_greedy_res

if __name__ == "__main__":
    with open('corpus(1).txt') as f:
        lines = f.readlines()
        for idx, lines in enumerate(lines):
            for item in find(lines):
                pos = item.find('因为')
                print(f'{idx+1}\t{"" if pos == 0 else item[:2]}\t*因为*\t{item[pos + 2: -4]}\t&所以&\t{item[-2:]}')
	# requirements: Use regular expressions to retrieve sentences containing specific
	# strings in the text, and align the output.
	# Find out the sentences that contain "because...so" in the text, and try to align
	# the output with two words as the center. Because...so the middle is all output,
	# output 2 bytes or more before and after. If there are multiple "because" for a sentence,
	# output them separately. Note that each cause can only be used once.

	import re

	def find(string: str):
	greedy = re.compile(r'.?.?因为.*所以..')
	non_greedy = re.compile(r'.?.?因为.*?所以..')
	greedy_res = greedy.findall(string)
	for item in greedy_res:
	if greedy.search(item[item.find('因为')+2:-4]) is not None:
	greedy_res.extend(find(item[item.find('因为')+2:-4]))
	non_greedy_res = non_greedy.findall(string)
	for item in non_greedy_res:
	if non_greedy.search(item[item.find('因为')+2:-4]) is not None:
	non_greedy_res.extend(find(item[item.find('因为')+2:-4]))
	if len(greedy_res) > len(non_greedy_res):
	return greedy_res
	else:
	return non_greedy_res

	if __name__ == "__main__":
	with open('corpus(1).txt') as f:
	lines = f.readlines()
	for idx, lines in enumerate(lines):
	for item in find(lines):
	pos = item.find('因为')
	print(f'{idx+1}\t{"" if pos == 0 else item[:2]}\t因为\t{item[pos + 2: -4]}\t&所以&\t{item[-2:]}')