Created
March 27, 2024 16:36
-
-
Save jedfrechette/cf0c063720caa7adbff1b2a307ece483 to your computer and use it in GitHub Desktop.
Standard Library e57 metadata reader.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Standard Library Imports | |
from math import ceil | |
from struct import unpack | |
from xml.etree import ElementTree | |
def get_header(e57_path): | |
""" | |
Read header data from e57 file. | |
:param e57_path: Path to the e57 to extract header from. | |
:return: {'file_signature': 'The file type signature. ' \ | |
'Shall contain the ASCII characters “ASTM-E57”', | |
'version_major': 'The file format major version number. ' \ | |
'The value shall be 1.' | |
'version_minor': 'The file format minor version number. ' \ | |
'The value shall be 0.' | |
'file_length': 'The physical length of the file, in bytes. ' \ | |
'Note that this length includes CRC bytes and ' \ | |
'any zero padding as described in 6.2.2. ' \ | |
'Shall be in the open interval (0, 2^63).' | |
'xml_offset': 'The physical file offset, in bytes, to the ' \ | |
'beginning of the XML section of the file. ' \ | |
'As defined in 3.2.10, this value includes CRC ' \ | |
'bytes. Shall be in the open interval (0, 2^63).' | |
'xml_length': 'The logical length, in bytes, of the XML ' \ | |
'section of the file, excluding CRC bytes and ' \ | |
'zero padding. Shall be in the open ' \ | |
'interval (0, 2^63).' | |
'page_size': ' The size a page, in bytes, as defined in 6.2. ' \ | |
'The value shall be 1024.'} | |
""" | |
with open(e57_path, 'rb') as f_handle: | |
values = unpack('<8sLLQQQQ', f_handle.read(48)) | |
return {'file_signature': values[0], | |
'version_major': values[1], | |
'version_minor': values[2], | |
'file_length': values[3], | |
'xml_offset': values[4], | |
'xml_length': values[5], | |
'page_size': values[6]} | |
def get_xml(e57_path): | |
""" | |
Read xml metadata from e57 file. | |
:param e57_path: Path to the e57 to extract metadata from. | |
:return: xml metadata ElementTree | |
""" | |
xml_data = [] | |
with open(e57_path, 'rb') as f_handle: | |
header = get_header(e57_path) | |
payload_size = header['page_size'] - 4 | |
xml_page_count = ceil( | |
(header['file_length'] - header['xml_offset']) / header[ | |
'page_size']) | |
for pn in range(xml_page_count): | |
if pn == 0: | |
# Handle first page | |
f_handle.seek(header['xml_offset'], 0) | |
next_page_offset = header['page_size'] * \ | |
(int(header['xml_offset'] / | |
header['page_size']) + 1) | |
remaining_page_size = next_page_offset - header['xml_offset'] | |
xml_data.append(unpack(f'{remaining_page_size - 4}sI', | |
f_handle.read(remaining_page_size))[0]) | |
elif pn == xml_page_count - 1: | |
# Handle last page | |
xml_end = header['xml_offset'] + header['xml_length'] + 4 * pn | |
xml_remaining = header['page_size'] - ( | |
header['file_length'] - xml_end) | |
xml_data.append(unpack(f'<{xml_remaining}s', | |
f_handle.read(xml_remaining))[0]) | |
else: | |
# Handle middle pages | |
xml_data.append(unpack(f'{payload_size}sI', | |
f_handle.read(header['page_size']))[0]) | |
ElementTree.register_namespace('', | |
"http://www.astm.org/COMMIT/E57/2010-e57-v1.0") | |
root = ElementTree.fromstring(''.join(str(s, 'utf-8') for s in xml_data)) | |
for elem in root.iter(): | |
if elem.text is not None: | |
elem.text = elem.text.strip() | |
if elem.tail is not None: | |
elem.tail = elem.tail.strip() | |
return root |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment