dionhaefner/vtktreebuilder.py

## vtktreebuilder.py
from xml.etree.ElementTree import XMLParser, TreeBuilder
import struct
import base64
import re

class VTKTreeBuilder(TreeBuilder):
    """
    Sublass of TreeBuilder that decodes ASCII or base64-encoded VTK DataArrays.
    When the end-tag of a base64-encoded DataArray is encountered, the XML string
    is decoded and unpacked according to its data type and the VTK specification.

    Only strips whitespace for ASCII-encoded VTK.

    VTK specification found at http://www.vtk.org/Wiki/VTK_XML_Formats
    """

    # Buffer codes from https://docs.python.org/2/library/struct.html
    buffers = {"Int8": "b", "UInt8": "B", "Int16": "h", "UInt16": "H",
               "Int32": "i", "UInt32": "I", "Int64": "q", "UInt64": "Q",
               "Float32": "f", "Float64": "d"}
    byteorders = {"LittleEndian": "<", "BigEndian": ">"}

    def start(self,tag,attrib):
        self.elem = super(VTKTreeBuilder,self).start(tag,attrib)
        self.array_data = ""
        if tag == "VTKFile":
            try:
                self.split_header = attrib["version"] == "0.1"
            except KeyError:
                raise ValueError("Missing version attribute in VTKFile tag")
            try:
                self.byteorder = self.byteorders[attrib["byte_order"]]
            except KeyError:
                raise ValueError("Unknown byteorder {}".format(attrib["byte_order"]))
            try:
                self.header_type = self.buffers[attrib["header_type"]]
            except KeyError: # default header in older VTK versions is UInt32
                self.header_type = "I"
        elif tag == "DataArray":
            if not attrib["format"] in ("binary","ascii"):
                raise ValueError("VTK data format must be ascii or binary (base64). Got: {}".format(attrib["format"]))
        return self.elem

    def data(self, data):
        """
        Just record the data instead of writing it immediately. All data in VTK
        files is contained in DataArray tags, so no need to record anything if
        we are not currently in a DataArray.

        """
        if self.elem.tag == "DataArray":
            self.array_data += data

    def end(self,tag):
        """
        Detect the end-tag of a DataArray.

        """
        if tag != "DataArray":
            return super(VTKTreeBuilder,self).end(tag)

        # remove trailing whitespace
        self.array_data = re.sub(r"\s+"," ","".join(self.array_data).strip())

        if self.elem.attrib["format"] == "binary":
            cbuf = self.buffers[self.elem.attrib["type"]]
            data = "".join(self.array_data)
            # binary encoded VTK files start with an integer giving the number of bytes to follow
            data_len = struct.unpack_from(self.byteorder + self.header_type, base64.b64decode(data))[0]
            if self.split_header: # vtk version 0.1, encoding header and content separately
                header_size = len(base64.b64encode(struct.pack(self.byteorder+self.header_type,0)))
                data_content = base64.b64decode(data[header_size:])
                byte_string = self.byteorder + cbuf * int(data_len / struct.calcsize(cbuf))
                data_unpacked = struct.unpack(byte_string, data_content)
            else: # vtk version 1.0, encoding header and content together
                data_content = base64.b64decode(data)
                byte_string = self.byteorder + self.header_type + cbuf * int(data_len / struct.calcsize(cbuf))
                data_unpacked = struct.unpack(byte_string, data_content)[1:]
            assert data_len == len(data_unpacked) * struct.calcsize(cbuf)
            self.array_data = " ".join([str(v) for v in data_unpacked]).strip()

        # write data to element
        super(VTKTreeBuilder,self).data(self.array_data)
        return super(VTKTreeBuilder,self).end(tag)


if __name__ == "__main__":
    """
    Test if two given VTK files (one binary, one ASCII) contain the same values.

    """
    import sys
    import xml.etree.ElementTree as ET
    parser1 = ET.parse(sys.argv[1],parser=XMLParser(target=VTKTreeBuilder()))
    parser2 = ET.parse(sys.argv[2],parser=XMLParser(target=VTKTreeBuilder()))
    for child1, child2 in zip(parser1.iter(),parser2.iter()):
        if not child1.text:
            continue
        arr1 = child1.text.split()
        arr2 = child2.text.split()
        if arr1 == arr2:
            print("DataArray {} matches".format(child1.attrib["Name"]))
        else:
            if len(arr1) != len(arr2):
                print("DataArray {} does not match in size. Got: {}, {}".format(child1.attrib["Name"],len(arr1),len(arr2)))
            maxerr = max([float(v1) - float(v2) for v1, v2 in zip(arr1, arr2)])
            print("DataArray {} does not match in value. Maximum error: {:.1e}".format(child1.attrib["Name"],maxerr))
	from xml.etree.ElementTree import XMLParser, TreeBuilder
	import struct
	import base64
	import re

	class VTKTreeBuilder(TreeBuilder):
	"""
	Sublass of TreeBuilder that decodes ASCII or base64-encoded VTK DataArrays.
	When the end-tag of a base64-encoded DataArray is encountered, the XML string
	is decoded and unpacked according to its data type and the VTK specification.

	Only strips whitespace for ASCII-encoded VTK.

	VTK specification found at http://www.vtk.org/Wiki/VTK_XML_Formats
	"""

	# Buffer codes from https://docs.python.org/2/library/struct.html
	buffers = {"Int8": "b", "UInt8": "B", "Int16": "h", "UInt16": "H",
	"Int32": "i", "UInt32": "I", "Int64": "q", "UInt64": "Q",
	"Float32": "f", "Float64": "d"}
	byteorders = {"LittleEndian": "<", "BigEndian": ">"}

	def start(self,tag,attrib):
	self.elem = super(VTKTreeBuilder,self).start(tag,attrib)
	self.array_data = ""
	if tag == "VTKFile":
	try:
	self.split_header = attrib["version"] == "0.1"
	except KeyError:
	raise ValueError("Missing version attribute in VTKFile tag")
	try:
	self.byteorder = self.byteorders[attrib["byte_order"]]
	except KeyError:
	raise ValueError("Unknown byteorder {}".format(attrib["byte_order"]))
	try:
	self.header_type = self.buffers[attrib["header_type"]]
	except KeyError: # default header in older VTK versions is UInt32
	self.header_type = "I"
	elif tag == "DataArray":
	if not attrib["format"] in ("binary","ascii"):
	raise ValueError("VTK data format must be ascii or binary (base64). Got: {}".format(attrib["format"]))
	return self.elem

	def data(self, data):
	"""
	Just record the data instead of writing it immediately. All data in VTK
	files is contained in DataArray tags, so no need to record anything if
	we are not currently in a DataArray.

	"""
	if self.elem.tag == "DataArray":
	self.array_data += data

	def end(self,tag):
	"""
	Detect the end-tag of a DataArray.

	"""
	if tag != "DataArray":
	return super(VTKTreeBuilder,self).end(tag)

	# remove trailing whitespace
	self.array_data = re.sub(r"\s+"," ","".join(self.array_data).strip())

	if self.elem.attrib["format"] == "binary":
	cbuf = self.buffers[self.elem.attrib["type"]]
	data = "".join(self.array_data)
	# binary encoded VTK files start with an integer giving the number of bytes to follow
	data_len = struct.unpack_from(self.byteorder + self.header_type, base64.b64decode(data))[0]
	if self.split_header: # vtk version 0.1, encoding header and content separately
	header_size = len(base64.b64encode(struct.pack(self.byteorder+self.header_type,0)))
	data_content = base64.b64decode(data[header_size:])
	byte_string = self.byteorder + cbuf * int(data_len / struct.calcsize(cbuf))
	data_unpacked = struct.unpack(byte_string, data_content)
	else: # vtk version 1.0, encoding header and content together
	data_content = base64.b64decode(data)
	byte_string = self.byteorder + self.header_type + cbuf * int(data_len / struct.calcsize(cbuf))
	data_unpacked = struct.unpack(byte_string, data_content)[1:]
	assert data_len == len(data_unpacked) * struct.calcsize(cbuf)
	self.array_data = " ".join([str(v) for v in data_unpacked]).strip()

	# write data to element
	super(VTKTreeBuilder,self).data(self.array_data)
	return super(VTKTreeBuilder,self).end(tag)


	if __name__ == "__main__":
	"""
	Test if two given VTK files (one binary, one ASCII) contain the same values.

	"""
	import sys
	import xml.etree.ElementTree as ET
	parser1 = ET.parse(sys.argv[1],parser=XMLParser(target=VTKTreeBuilder()))
	parser2 = ET.parse(sys.argv[2],parser=XMLParser(target=VTKTreeBuilder()))
	for child1, child2 in zip(parser1.iter(),parser2.iter()):
	if not child1.text:
	continue
	arr1 = child1.text.split()
	arr2 = child2.text.split()
	if arr1 == arr2:
	print("DataArray {} matches".format(child1.attrib["Name"]))
	else:
	if len(arr1) != len(arr2):
	print("DataArray {} does not match in size. Got: {}, {}".format(child1.attrib["Name"],len(arr1),len(arr2)))
	maxerr = max([float(v1) - float(v2) for v1, v2 in zip(arr1, arr2)])
	print("DataArray {} does not match in value. Maximum error: {:.1e}".format(child1.attrib["Name"],maxerr))