Skip to content

Instantly share code, notes, and snippets.

@gacarrillor
Created August 31, 2021 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gacarrillor/e873e377a81bd91c31dbaa49a4f2f2a7 to your computer and use it in GitHub Desktop.
Save gacarrillor/e873e377a81bd91c31dbaa49a4f2f2a7 to your computer and use it in GitHub Desktop.
import re
import xml.etree.cElementTree as et
def get_models_from_xtf(xtf_path):
"""
Get model names from an XTF file. Since XTF can be very large, we follow this strategy:
1. Parse line by line.
1.a. Compare parsed line with the regular expression to get the Header Section.
1.b. If found, stop parsing the XTF file and go to 2. If not found, append the new line to parsed lines and go
to next line.
2. Give the Header Section to an XML parser and extract models. Note that we don't give the full XTF file to the XML
parser because it will read it completely, which may be not optimal.
:param xtf_path: Path to an XTF file
:return: List of model names from the XTF
"""
model_names = list()
pattern = re.compile(r'(<HEADERSECTION[^>]*.*</HEADERSECTION>)')
text_found = "<foo/>"
with open(xtf_path, 'r') as f:
lines = ""
for line in f:
lines += line
res = re.search(pattern, lines)
if res:
text_found = str(res.groups()[0])
break
if text_found:
root = et.fromstring(text_found)
element = root.find('MODELS')
if element:
for sub_element in element:
if "NAME" in sub_element.attrib:
model_names.append(sub_element.attrib["NAME"])
return sorted(model_names)
import xml.etree.ElementTree as ET
def transfer_file_models(xtf_file_path):
models = []
try:
root = ET.parse(xtf_file_path).getroot()
for model_element in root.iter('{http://www.interlis.ch/INTERLIS2.3}MODEL'):
model = {}
model['name']= model_element.get('NAME')
models.append(model)
except ET.ParseError as e:
print('Could not parse transferfile file `{file}` ({exception})'.format(
file=xtf_file_path, exception=str(e)))
return models
xtf_path = '/docs/tr/ai/insumos/xtfs/my_data.xtf'
import time
start = time.time()
print(len(get_models_from_xtf(xtf_path)))
end = time.time()
print('Option STC, execution time in seconds: {}'.format(end-start))
start = time.time()
print(len(transfer_file_models(xtf_path)))
end = time.time()
print('Option Opengis.ch, execution time in seconds: {}'.format(end-start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment