Skip to content

Instantly share code, notes, and snippets.

@JJTech0130
Last active June 9, 2023 20:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JJTech0130/b600c9f1e85a58558fe2c576fdac39b8 to your computer and use it in GitHub Desktop.
Save JJTech0130/b600c9f1e85a58558fe2c576fdac39b8 to your computer and use it in GitHub Desktop.
Parse Apple's ITML into proper HTML
from lxml import etree
import requests
OVERRIDE_CSS = """
/* Custom injected CSS */
.VBoxView {
display: flex;
flex-direction: column;
}
.HBoxView {
display: flex;
flex-direction: row;
}
.MatrixView {
display: grid;
}
a {
text-decoration: none;
}
"""
def convert_fontstyle_to_css(fontstyle: str):
fontstyle = "<root>" + fontstyle + "</root>"
tree = etree.fromstring(fontstyle)
css = ""
parse_later = [] # List of elements to parse later
additional = {} # Additional classes to OR to class names
# Loop through all the elements in the fontstyles
for element in tree:
if element.tag == "FontStyle":
# We can't parse FontStyles until after we've parsed FontStyleSets
parse_later.append(element)
elif element.tag == "FontStyleSet":
if element.attrib['normalStyle'] not in additional:
additional[element.attrib['normalStyle']] = []
if element.attrib['linkStyle'] not in additional:
additional[element.attrib['linkStyle']] = []
if element.attrib['linkPressedStyle'] not in additional:
additional[element.attrib['linkPressedStyle']] = []
if element.attrib['linkRolloverStyle'] not in additional:
additional[element.attrib['linkRolloverStyle']] = []
additional[element.attrib['normalStyle']].append(f".{element.attrib['setName']}")
additional[element.attrib['linkStyle']].append(f".{element.attrib['setName']} a")
additional[element.attrib['linkPressedStyle']].append(f".{element.attrib['setName']} a:active")
additional[element.attrib['linkRolloverStyle']].append(f".{element.attrib['setName']} a:hover")
for element in parse_later:
if element.tag == "FontStyle":
if element.attrib['styleName'] in additional:
# There are additional classes to OR to the class name
css += f".{element.attrib['styleName']}, {', '.join(additional[element.attrib['styleName']])} {{\n"
else:
css += f".{element.attrib['styleName']} {{\n"
if 'font' in element.attrib:
# If there are commas, split it into multiple fonts
fonts = element.attrib['font'].split(",")
fonts_out = []
for font in fonts:
# If there are spaces, wrap it in quotes
if " " in font:
#print(f"Warning: font {font} contains spaces")
fonts_out.append(f"\'{font}\'")
else:
fonts_out.append(font)
if font == "SF UI":
# SF UI font is probably not available
fonts_out.append("-apple-system")
fonts_out.append("BlinkMacSystemFont")
fonts_out.append("sans-serif")
# Rejoin the fonts
css += f" font-family: {', '.join(fonts_out)};\n"
if 'size' in element.attrib:
css += f" font-size: {element.attrib['size']}px;\n"
if 'color' in element.attrib:
css += f" color: #{element.attrib['color']};\n"
if 'line-height' in element.attrib:
css += f" line-height: {element.attrib['line-height']}px;\n"
if 'face' in element.attrib:
faces = element.attrib['face'].split(",")
if 'bold' in faces:
css += f" font-weight: bold;\n"
if 'underline' in faces:
css += f" text-decoration: underline;\n"
if not 'underline' in faces and not 'bold' in faces:
print(f"Warning: Unknown face {element.attrib['face']}")
css += "}\n"
return css
# TODO: Make this not a global variable
global_includes = []
# Convert ITML elements to HTML
def convert_itml_to_html(element: etree._Element):
global global_includes
if "}" in element.tag:
# Remove the namespace from the tag
element.tag = element.tag.split("}")[1]
#print(element.tag)
if element.tag == "Document":
element.tag = "body"
elif element.tag == 'Include':
global_includes.append(element.attrib['url'])
return None
elif element.tag == "TextView":
element.tag = "p"
# make not self-closing
if len(element) == 0 and element.text is None:
element.text = ""
elif element.tag == "PictureView":
element.tag = "img"
element.attrib["src"] = element.attrib["url"]
# Other "Views" are converted into divs with appropriate styling
elif "View" in element.tag:
element.attrib["class"] = element.tag
element.tag = "div"
# Make sure it's not self-closing
if len(element) == 0 and element.text is None:
element.text = ""
elif element.tag == "b":
pass
elif element.tag == "SetFontStyle":
element.tag = "span"
elif element.tag == "OpenURL":
element.tag = "a"
element.attrib["href"] = element.attrib["url"]
del element.attrib["url"]
if "target" in element.attrib:
del element.attrib["target"]
# links need to inherit the class of their parent
#element.attrib["class"] = element.getparent().attrib["class"]
else:
print(f"Warning: Ignoring element {element.tag}")
return None
# Fixup styling
if "styleSet" in element.attrib:
if "class" not in element.attrib:
element.attrib["class"] = element.attrib["styleSet"]
else:
element.attrib["class"] += " " + element.attrib["styleSet"]
del element.attrib["styleSet"]
if "normalStyle" in element.attrib:
if "class" not in element.attrib:
element.attrib["class"] = element.attrib["normalStyle"]
else:
element.attrib["class"] += " " + element.attrib["normalStyle"]
del element.attrib["normalStyle"]
# Fixup other attributes into inline styles
if 'bottomInset' in element.attrib:
element.attrib['style'] = f"margin-bottom: {element.attrib['bottomInset']}px;{element.attrib.get('style', '')}"
del element.attrib['bottomInset']
if 'topInset' in element.attrib:
element.attrib['style'] = f"margin-top: {element.attrib['topInset']}px;" + element.attrib.get('style', "")
del element.attrib['topInset']
if 'leftInset' in element.attrib:
element.attrib['style'] = f"margin-left: {element.attrib['leftInset']}px;" + element.attrib.get('style', "")
del element.attrib['leftInset']
if 'rightInset' in element.attrib:
element.attrib['style'] = f"margin-right: {element.attrib['rightInset']}px;" + element.attrib.get('style', "")
del element.attrib['rightInset']
if 'width' in element.attrib:
element.attrib['style'] = f"width: {element.attrib['width']}px;" + element.attrib.get('style', "")
del element.attrib['width']
if 'height' in element.attrib:
element.attrib['style'] = f"height: {element.attrib['height']}px;" + element.attrib.get('style', "")
del element.attrib['height']
if 'minWidth' in element.attrib:
element.attrib['style'] = f"min-width: {element.attrib['minWidth']}px;" + element.attrib.get('style', "")
del element.attrib['minWidth']
if 'stretchiness' in element.attrib:
element.attrib['style'] = f"flex-grow: {element.attrib['stretchiness']};" + element.attrib.get('style', "")
del element.attrib['stretchiness']
if 'textJust' in element.attrib:
element.attrib['style'] = f"text-align: {element.attrib['textJust']};" + element.attrib.get('style', "")
del element.attrib['textJust']
if 'borderColor' in element.attrib:
element.attrib['style'] = f"border-color: #{element.attrib['borderColor']};" + element.attrib.get('style', "")
if not 'border-style' in element.attrib['style']:
element.attrib['style'] += "border-style: solid;"
del element.attrib['borderColor']
if 'borderWidth' in element.attrib:
element.attrib['style'] = f"border-width: {element.attrib['borderWidth']}px;" + element.attrib.get('style', "")
if not 'border-style' in element.attrib['style']:
element.attrib['style'] += "border-style: solid;"
del element.attrib['borderWidth']
if 'viewName' in element.attrib:
element.attrib['id'] = element.attrib['viewName']
del element.attrib['viewName']
for child in element:
conv = convert_itml_to_html(child)
if conv is None:
#print(f"Warning: DELETING element {child.tag}")
element.remove(child)
return True
def fixup_includes(includes):
style_elems = []
for include in includes:
print(f"Fetching {include} to convert to CSS...")
# Request the URL
r = requests.get(include)
# Feed to convert css
css = convert_fontstyle_to_css(r.text)
css = "\n/* " + include + " */\n" + css
# # Indent the CSS 6 spaces (html + head + style)
# css = " " + css.replace("\n", "\n ")
style_elems.append(css)
return style_elems
# Hacky function to properly indent multiline CSS so that it looks right when lxml serializes it
def indent(css, level=0, dent=" "):
ident = dent * level
css = css.strip() # Remove trailing newline and spaces
css = "\n" + css # Add a newline to the beginning
css = css.replace("\n", "\n" + ident) # Add the ident
css += "\n" # Add a newline to the end
css += dent * (level - 1) # Add the ident so that the end tag is correct
return css
def convert_itml_document(tree) -> str:
"""Convert an ITML document to HTML"""
# Get the root element
#root = tree.getroot()
# If tree is an ElementTree, get the root element
if isinstance(tree, etree._ElementTree):
root = tree.getroot()
# If it's an Element, use it as the root
elif isinstance(tree, etree._Element):
root = tree
# If it's a string, parse it as XML and use the root
elif isinstance(tree, str):
root = etree.fromstring(tree, parser=etree.XMLParser(recover=True))
# Convert ITML elements to HTML elements
convert_itml_to_html(root)
# Wrap the converted body in an HTML document
html = etree.Element("html")
head = etree.SubElement(html, "head")
# For all the FontStyles included, convert them to CSS
style_elems = fixup_includes(global_includes)
# Append the View CSS stub
style_elems.append(OVERRIDE_CSS)
# Append the style elements to the head
for style_elem in style_elems:
style = etree.SubElement(head, "style")
style.text = indent(style_elem, 3)
# Append the converted body to the HTML document
html.append(root)
# Create an HTML tree from the HTML document
html_tree = etree.ElementTree(html)
# Serialize the HTML tree to a string
html_string = etree.tostring(html_tree, encoding="unicode", pretty_print=True)
#print(html_string)
return html_string
def extract_protocol(tree) -> str:
# If tree is an ElementTree, get the root element
if isinstance(tree, etree._ElementTree):
root = tree.getroot()
# If it's an Element, use it as the root
elif isinstance(tree, etree._Element):
root = tree
# If it's a string, parse it as XML and use the root
elif isinstance(tree, str):
root = etree.fromstring(tree, parser=etree.XMLParser(recover=True,ns_clean=True))
# Find Protocol node, ignoring namespaces
for elem in root:
if elem.tag.endswith("Protocol"):
protocol = elem
break
return etree.tostring(protocol[0]).decode()
# with open("input.itml", "r") as f:
# itml_string = f.read()
# html_string = convert_itml_document(etree.fromstring(itml_string))
# with open("output.html", "w") as f:
# f.write(html_string)
# print("Converted input.itml to output.html")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment