fhardison/gr_tree.py

## gr_tree.py
import xml.etree.ElementTree as ET
from difflib import get_close_matches
from pathlib import Path
import sys


DIR = Path('macula-greek/SBLGNT/lowfat/')
FILES = {
        'MAT': '01-matthew.xml',
        'MRK': '02-mark.xml',
        'LUK': '03-luke.xml',
        'JHN': '04-john.xml',
        'ACT': '05-acts.xml',
        'ROM': '06-romans.xml',
        '1CO': '07-1corinthians.xml',
        '2CO':  '08-2corinthians.xml',
        'GAL': '09-galatians.xml',
        'EPH': '10-ephesians.xml',
        'PHP': '11-philippians.xml',
        'COL': '12-colossians.xml',
        '1TH': '13-1thessalonians.xml',
        '2TH': '14-2thessalonians.xml',
        '1TI': '15-1timothy.xml',
        '2TI': '16-2timothy.xml',
        'TIT': '17-titus.xml',
        'PHM': '18-philemon.xml',
        'HEB': '19-hebrews.xml',
        'JAS': '20-james.xml',
        '1PE': '21-1peter.xml',
        '2PE': '22-2peter.xml',
        '1JN': '23-1john.xml',
        '2JN': '24-2john.xml',
        '3JN': '25-3john.xml',
        'JUD': '26-jude.xml',
        'REV': '27-revelation.xml',
}

target = sys.argv[1]


xml_file = target.split(' ')[0]

if len(sys.argv) > 2:
    SEP = sys.argv[2]
else:
    SEP = '\t'
if xml_file not in FILES:
    print(f"{target} not found in files")
    print(f"did you mean {', '.join(get_close_matches(xml_file, FILES.keys()))}")
    exit()
tree = ET.parse(str(DIR / Path(FILES[xml_file])))
root = tree.getroot()


def handle_wg(xs, level, sep, last_level_was_blank):
    buffer = []
    for x in xs:
        if x.tag == 'wg':
            has_words = len(x.findall('./w')) < 1
            # head = f"{x.get('class', '')}[{x.get('role', '')}] ".replace('[]', '')
            head = f"{x.get('role', '')} ".upper()

            wg_type_mod = -1 if last_level_was_blank else 0
            if head.strip():
                buffer.append('\n' + sep * level + head + ' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
            else:
                buffer.append(' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
        if x.tag == 'w':
            role = x.get("role", '').upper()
            cl = x.get('class', '').upper()
            after = x.get('after', '').strip()
            if cl.strip().lower() == 'conj':
                buffer.append('\n' + sep * (level) + (role + ' ' + x.text + after).strip())
                level = level - 1
            elif role.strip():
                buffer.append('\n' + sep * level + (role + ' ' +  x.text + after).strip())
            else:
                buffer.append(x.text + after)
    return buffer


for elem in root.findall('.//sentence'):
    milestones = [e.get('id').strip() for e in elem.findall('.//milestone')]
    if not target in milestones:
        continue
    p = elem.find('./p').text
    for e in elem.findall('./wg'):
        print(' '.join(handle_wg(e.findall('./'), -1, SEP, False)).strip())
	import xml.etree.ElementTree as ET
	from difflib import get_close_matches
	from pathlib import Path
	import sys


	DIR = Path('macula-greek/SBLGNT/lowfat/')
	FILES = {
	'MAT': '01-matthew.xml',
	'MRK': '02-mark.xml',
	'LUK': '03-luke.xml',
	'JHN': '04-john.xml',
	'ACT': '05-acts.xml',
	'ROM': '06-romans.xml',
	'1CO': '07-1corinthians.xml',
	'2CO': '08-2corinthians.xml',
	'GAL': '09-galatians.xml',
	'EPH': '10-ephesians.xml',
	'PHP': '11-philippians.xml',
	'COL': '12-colossians.xml',
	'1TH': '13-1thessalonians.xml',
	'2TH': '14-2thessalonians.xml',
	'1TI': '15-1timothy.xml',
	'2TI': '16-2timothy.xml',
	'TIT': '17-titus.xml',
	'PHM': '18-philemon.xml',
	'HEB': '19-hebrews.xml',
	'JAS': '20-james.xml',
	'1PE': '21-1peter.xml',
	'2PE': '22-2peter.xml',
	'1JN': '23-1john.xml',
	'2JN': '24-2john.xml',
	'3JN': '25-3john.xml',
	'JUD': '26-jude.xml',
	'REV': '27-revelation.xml',
	}

	target = sys.argv[1]


	xml_file = target.split(' ')[0]

	if len(sys.argv) > 2:
	SEP = sys.argv[2]
	else:
	SEP = '\t'
	if xml_file not in FILES:
	print(f"{target} not found in files")
	print(f"did you mean {', '.join(get_close_matches(xml_file, FILES.keys()))}")
	exit()
	tree = ET.parse(str(DIR / Path(FILES[xml_file])))
	root = tree.getroot()


	def handle_wg(xs, level, sep, last_level_was_blank):
	buffer = []
	for x in xs:
	if x.tag == 'wg':
	has_words = len(x.findall('./w')) < 1
	# head = f"{x.get('class', '')}[{x.get('role', '')}] ".replace('[]', '')
	head = f"{x.get('role', '')} ".upper()

	wg_type_mod = -1 if last_level_was_blank else 0
	if head.strip():
	buffer.append('\n' + sep * level + head + ' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
	else:
	buffer.append(' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
	if x.tag == 'w':
	role = x.get("role", '').upper()
	cl = x.get('class', '').upper()
	after = x.get('after', '').strip()
	if cl.strip().lower() == 'conj':
	buffer.append('\n' + sep * (level) + (role + ' ' + x.text + after).strip())
	level = level - 1
	elif role.strip():
	buffer.append('\n' + sep * level + (role + ' ' + x.text + after).strip())
	else:
	buffer.append(x.text + after)
	return buffer



	for elem in root.findall('.//sentence'):
	milestones = [e.get('id').strip() for e in elem.findall('.//milestone')]
	if not target in milestones:
	continue
	p = elem.find('./p').text
	for e in elem.findall('./wg'):
	print(' '.join(handle_wg(e.findall('./'), -1, SEP, False)).strip())