Skip to content

Instantly share code, notes, and snippets.

@fhardison
Created October 12, 2023 13:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fhardison/b173401017ae53b21e6ffbc461581e82 to your computer and use it in GitHub Desktop.
Save fhardison/b173401017ae53b21e6ffbc461581e82 to your computer and use it in GitHub Desktop.
Simple script to display lowfat syntax trees from macula-greek on the command line
import xml.etree.ElementTree as ET
from difflib import get_close_matches
from pathlib import Path
import sys
DIR = Path('macula-greek/SBLGNT/lowfat/')
FILES = {
'MAT': '01-matthew.xml',
'MRK': '02-mark.xml',
'LUK': '03-luke.xml',
'JHN': '04-john.xml',
'ACT': '05-acts.xml',
'ROM': '06-romans.xml',
'1CO': '07-1corinthians.xml',
'2CO': '08-2corinthians.xml',
'GAL': '09-galatians.xml',
'EPH': '10-ephesians.xml',
'PHP': '11-philippians.xml',
'COL': '12-colossians.xml',
'1TH': '13-1thessalonians.xml',
'2TH': '14-2thessalonians.xml',
'1TI': '15-1timothy.xml',
'2TI': '16-2timothy.xml',
'TIT': '17-titus.xml',
'PHM': '18-philemon.xml',
'HEB': '19-hebrews.xml',
'JAS': '20-james.xml',
'1PE': '21-1peter.xml',
'2PE': '22-2peter.xml',
'1JN': '23-1john.xml',
'2JN': '24-2john.xml',
'3JN': '25-3john.xml',
'JUD': '26-jude.xml',
'REV': '27-revelation.xml',
}
target = sys.argv[1]
xml_file = target.split(' ')[0]
if len(sys.argv) > 2:
SEP = sys.argv[2]
else:
SEP = '\t'
if xml_file not in FILES:
print(f"{target} not found in files")
print(f"did you mean {', '.join(get_close_matches(xml_file, FILES.keys()))}")
exit()
tree = ET.parse(str(DIR / Path(FILES[xml_file])))
root = tree.getroot()
def handle_wg(xs, level, sep, last_level_was_blank):
buffer = []
for x in xs:
if x.tag == 'wg':
has_words = len(x.findall('./w')) < 1
# head = f"{x.get('class', '')}[{x.get('role', '')}] ".replace('[]', '')
head = f"{x.get('role', '')} ".upper()
wg_type_mod = -1 if last_level_was_blank else 0
if head.strip():
buffer.append('\n' + sep * level + head + ' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
else:
buffer.append(' '.join(handle_wg(x.findall("./"), level + 1 + wg_type_mod, sep, has_words)))
if x.tag == 'w':
role = x.get("role", '').upper()
cl = x.get('class', '').upper()
after = x.get('after', '').strip()
if cl.strip().lower() == 'conj':
buffer.append('\n' + sep * (level) + (role + ' ' + x.text + after).strip())
level = level - 1
elif role.strip():
buffer.append('\n' + sep * level + (role + ' ' + x.text + after).strip())
else:
buffer.append(x.text + after)
return buffer
for elem in root.findall('.//sentence'):
milestones = [e.get('id').strip() for e in elem.findall('.//milestone')]
if not target in milestones:
continue
p = elem.find('./p').text
for e in elem.findall('./wg'):
print(' '.join(handle_wg(e.findall('./'), -1, SEP, False)).strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment