Skip to content

Instantly share code, notes, and snippets.

@prcutler
Forked from Neradoc/ElementTree.py
Created March 11, 2023 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prcutler/84f40979c516a308b166f49f85668245 to your computer and use it in GitHub Desktop.
Save prcutler/84f40979c516a308b166f49f85668245 to your computer and use it in GitHub Desktop.
Circuitpython XML example, port of micropython-lib.
# SPDX-FileCopyrightText: Copyright (c) 2022 Neradoc
# SPDX-License-Identifier: Unlicense
import sys
from ElementTree import parse
with open("some-demo.xml", "r") as fp:
tree = parse(fp)
print(tree)
def print_sub_tree(node, depth=0):
if node.text is not None:
text = '"' + node.text + '"'
else:
text = ""
print(" "*depth, "-", node.tag, text)
for key, value in node.attrib.items():
print(" "*depth, "|", key, ":", value)
for subnode in node:
print_sub_tree(subnode, depth+2)
print_sub_tree(tree.getroot())
# This file is part of the standard library of Pycopy project, minimalist
# and lightweight Python implementation.
#
# https://github.com/pfalcon/pycopy
# https://github.com/pfalcon/pycopy-lib
#
# The MIT License (MIT)
#
# Copyright (c) 2018-2020 Paul Sokolovsky
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import io
import xmltok2
class ParseError(Exception):
pass
class Element:
def __init__(self):
self.tag = None
self.attrib = {}
self.text = None
self.tail = None
self._children = []
def __getitem__(self, i):
return self._children[i]
def __len__(self):
return len(self._children)
def append(self, el):
self._children.append(el)
def get(self, key, default=None):
return self.attrib.get(key, default)
def set(self, key, value):
self.attrib[key] = value
def write(self, file):
assert self.tag is not None
file.write("<%s" % self.tag)
for k, v in self.attrib.items():
file.write(' {}="{}"'.format(k, v))
file.write(">")
if self.text is not None:
file.write(self.text)
for t in self._children:
t.write(file)
file.write("</%s>" % self.tag)
if self.tail is not None:
file.write(self.tail)
class ElementTree:
def __init__(self, root):
self.root = root
def getroot(self):
return self.root
def write(self, file):
self.root.write(file)
file.write("\n")
def parse_el(stream):
stack = []
root = None
last = None
for ev in xmltok2.tokenize(stream):
typ = ev[0]
if typ == xmltok2.START_TAG:
el = Element()
el.tag = ev[2]
if not stack:
root = el
else:
stack[-1]._children.append(el)
stack.append(el)
last = None
elif typ == xmltok2.ATTR:
# Ignore attrs of processing instructions
if stack:
stack[-1].attrib[ev[2]] = ev[3]
elif typ == xmltok2.TEXT:
if last is None:
stack[-1].text = ev[1]
else:
last.tail = ev[1]
elif typ == xmltok2.END_TAG:
if stack[-1].tag != ev[2]:
raise ParseError("mismatched tag: /%s (expected: /%s)" % (ev[1][1], stack[-1].tag))
last = stack.pop()
return root
def parse(source):
return ElementTree(parse_el(source))
def fromstring(data):
buf = io.StringIO(data)
return parse_el(buf)
<?xml version="1.0" encoding="UTF-8"?>
<note>
<to>Tove</to>
<from>Jani</from>
<heading color="red">Reminder</heading>
<body class="important">Don't forget me this weekend!</body>
</note>
# This file is part of the standard library of Pycopy project, minimalist
# and lightweight Python implementation.
#
# https://github.com/pfalcon/pycopy
# https://github.com/pfalcon/pycopy-lib
#
# The MIT License (MIT)
#
# Copyright (c) 2018-2019 Paul Sokolovsky
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
TEXT = "TEXT"
START_TAG = "START_TAG"
#START_TAG_DONE = "START_TAG_DONE"
END_TAG = "END_TAG"
PI = "PI"
#PI_DONE = "PI_DONE"
ATTR = "ATTR"
#ATTR_VAL = "ATTR_VAL"
class XMLSyntaxError(Exception):
pass
class XMLTokenizer:
def __init__(self, f):
self.f = f
self.c = ""
self.nextch()
def getch(self):
c = self.c
self.nextch()
return c
def eof(self):
return self.c == ""
def nextch(self):
self.c = self.f.read(1)
def skip_ws(self):
while self.c.isspace():
self.nextch()
def isident(self):
self.skip_ws()
return self.c.isalpha()
def getident(self):
self.skip_ws()
ident = ""
while self.c:
c = self.c
if not(c.isalpha() or c.isdigit() or c in "_-."):
break
ident += self.getch()
return ident
def putnsident(self, res):
ns = ""
ident = self.getident()
if self.c == ":":
self.nextch()
ns = ident
ident = self.getident()
res[1] = ns
res[2] = ident
def match(self, c):
self.skip_ws()
if self.c == c:
self.nextch()
return True
return False
def expect(self, c):
if not self.match(c):
raise XMLSyntaxError
def lex_attrs_till(self, res):
while self.isident():
res[0] = ATTR
self.putnsident(res)
self.expect("=")
quote = self.getch()
if quote != '"' and quote != "'":
raise XMLSyntaxError
val = ""
while self.c != quote:
val += self.getch()
self.expect(quote)
res[3] = val
yield res
res[3] = None
def tokenize(self):
res = [None, None, None, None]
while not self.eof():
if self.match("<"):
if self.match("/"):
res[0] = END_TAG
self.putnsident(res)
yield res
self.expect(">")
elif self.match("?"):
res[0] = PI
res[1] = self.getident()
yield res
yield from self.lex_attrs_till(res)
self.expect("?")
self.expect(">")
elif self.match("!"):
self.expect("-")
self.expect("-")
last3 = ''
while True:
last3 = last3[-2:] + self.getch()
if last3 == "-->":
break
else:
res[0] = START_TAG
self.putnsident(res)
ns = res[1]
tag = res[2]
yield res
yield from self.lex_attrs_till(res)
if self.match("/"):
res[0] = END_TAG
res[1] = ns
res[2] = tag
yield res
self.expect(">")
else:
text = ""
while self.c and self.c != "<":
text += self.getch()
if text:
res[0] = TEXT
res[1] = text
res[2] = None
yield res
def gfind(gen, pred):
for i in gen:
if pred(i):
return i
def text_of(gen, tag):
# Return text content of a leaf tag from tokenizer stream
def match_tag(t):
if t[0] != START_TAG:
return False
if isinstance(tag, tuple):
return t[1] == tag[0] and t[2] == tag[1]
return t[2] == tag
gfind(gen, match_tag)
# Assumes no attributes
res = next(gen)
assert res[0] == TEXT
return res[1]
def tokenize(file):
return XMLTokenizer(file).tokenize()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment