Skip to content

Instantly share code, notes, and snippets.

@dionhaefner
Last active October 10, 2016 10:50
Show Gist options
  • Save dionhaefner/f62e4d3f717da660af02beb9b4ea0f61 to your computer and use it in GitHub Desktop.
Save dionhaefner/f62e4d3f717da660af02beb9b4ea0f61 to your computer and use it in GitHub Desktop.
from xml.etree.ElementTree import XMLParser, TreeBuilder
import struct
import base64
import re
class VTKTreeBuilder(TreeBuilder):
"""
Sublass of TreeBuilder that decodes ASCII or base64-encoded VTK DataArrays.
When the end-tag of a base64-encoded DataArray is encountered, the XML string
is decoded and unpacked according to its data type and the VTK specification.
Only strips whitespace for ASCII-encoded VTK.
VTK specification found at http://www.vtk.org/Wiki/VTK_XML_Formats
"""
# Buffer codes from https://docs.python.org/2/library/struct.html
buffers = {"Int8": "b", "UInt8": "B", "Int16": "h", "UInt16": "H",
"Int32": "i", "UInt32": "I", "Int64": "q", "UInt64": "Q",
"Float32": "f", "Float64": "d"}
byteorders = {"LittleEndian": "<", "BigEndian": ">"}
def start(self,tag,attrib):
self.elem = super(VTKTreeBuilder,self).start(tag,attrib)
self.array_data = ""
if tag == "VTKFile":
try:
self.split_header = attrib["version"] == "0.1"
except KeyError:
raise ValueError("Missing version attribute in VTKFile tag")
try:
self.byteorder = self.byteorders[attrib["byte_order"]]
except KeyError:
raise ValueError("Unknown byteorder {}".format(attrib["byte_order"]))
try:
self.header_type = self.buffers[attrib["header_type"]]
except KeyError: # default header in older VTK versions is UInt32
self.header_type = "I"
elif tag == "DataArray":
if not attrib["format"] in ("binary","ascii"):
raise ValueError("VTK data format must be ascii or binary (base64). Got: {}".format(attrib["format"]))
return self.elem
def data(self, data):
"""
Just record the data instead of writing it immediately. All data in VTK
files is contained in DataArray tags, so no need to record anything if
we are not currently in a DataArray.
"""
if self.elem.tag == "DataArray":
self.array_data += data
def end(self,tag):
"""
Detect the end-tag of a DataArray.
"""
if tag != "DataArray":
return super(VTKTreeBuilder,self).end(tag)
# remove trailing whitespace
self.array_data = re.sub(r"\s+"," ","".join(self.array_data).strip())
if self.elem.attrib["format"] == "binary":
cbuf = self.buffers[self.elem.attrib["type"]]
data = "".join(self.array_data)
# binary encoded VTK files start with an integer giving the number of bytes to follow
data_len = struct.unpack_from(self.byteorder + self.header_type, base64.b64decode(data))[0]
if self.split_header: # vtk version 0.1, encoding header and content separately
header_size = len(base64.b64encode(struct.pack(self.byteorder+self.header_type,0)))
data_content = base64.b64decode(data[header_size:])
byte_string = self.byteorder + cbuf * int(data_len / struct.calcsize(cbuf))
data_unpacked = struct.unpack(byte_string, data_content)
else: # vtk version 1.0, encoding header and content together
data_content = base64.b64decode(data)
byte_string = self.byteorder + self.header_type + cbuf * int(data_len / struct.calcsize(cbuf))
data_unpacked = struct.unpack(byte_string, data_content)[1:]
assert data_len == len(data_unpacked) * struct.calcsize(cbuf)
self.array_data = " ".join([str(v) for v in data_unpacked]).strip()
# write data to element
super(VTKTreeBuilder,self).data(self.array_data)
return super(VTKTreeBuilder,self).end(tag)
if __name__ == "__main__":
"""
Test if two given VTK files (one binary, one ASCII) contain the same values.
"""
import sys
import xml.etree.ElementTree as ET
parser1 = ET.parse(sys.argv[1],parser=XMLParser(target=VTKTreeBuilder()))
parser2 = ET.parse(sys.argv[2],parser=XMLParser(target=VTKTreeBuilder()))
for child1, child2 in zip(parser1.iter(),parser2.iter()):
if not child1.text:
continue
arr1 = child1.text.split()
arr2 = child2.text.split()
if arr1 == arr2:
print("DataArray {} matches".format(child1.attrib["Name"]))
else:
if len(arr1) != len(arr2):
print("DataArray {} does not match in size. Got: {}, {}".format(child1.attrib["Name"],len(arr1),len(arr2)))
maxerr = max([float(v1) - float(v2) for v1, v2 in zip(arr1, arr2)])
print("DataArray {} does not match in value. Maximum error: {:.1e}".format(child1.attrib["Name"],maxerr))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment