Created
December 1, 2019 14:29
-
-
Save devilholk/03b60bf43e588a1cbf3379f6caff654b to your computer and use it in GitHub Desktop.
Read Open Document Table and put in a list of named tuples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree, re, zipfile, collections | |
def get_ns(tag): | |
ns = re.findall(r'\{(.*)\}(.*)', tag)[0] #Assume only one | |
return ns | |
def one_or_default(it, default=None): | |
try: | |
result = it.__next__() | |
try: | |
extra = it.__next__() | |
raise Exception(f'Too much data: {extra}') | |
except StopIteration: | |
return result | |
except StopIteration: | |
return default | |
def any_differs(it, value): | |
for i in it: | |
if i != value: | |
return True | |
return False | |
def ods_to_named_tuples(filename): | |
#Fuggly solution, probably gives you the first table if you have multiple sheets, didn't test | |
with zipfile.ZipFile(filename) as z: | |
with z.open('content.xml') as infile: | |
ns_map = dict() | |
table = None | |
for ev, v in xml.etree.ElementTree.iterparse(infile, events='start-ns end'.split()): | |
if ev == 'start-ns': | |
key, value = v | |
ns_map[key] = value | |
elif ev == 'end': | |
ns, tag = get_ns(v.tag) | |
if tag == 'table' and ns == ns_map['table']: | |
table = v | |
break | |
assert table, f'Could not find any table in {infile}' | |
result = list() | |
cell_type = None | |
for r in table.findall('table:table-row', ns_map): | |
row = [one_or_default((t.text.strip() for t in c.findall('text:p', ns_map)), '') for c in r.findall('table:table-cell', ns_map)] | |
while row and not row[-1]: | |
del row[-1] | |
if any_differs(row, ''): | |
if cell_type: | |
missing_cells = len(cell_type._fields) - len(row) | |
result.append(cell_type(*(row + [''] * missing_cells))) | |
else: | |
cell_type = collections.namedtuple('cell', row) | |
return result | |
for row in ods_to_named_tuples('/home/devilholk/operators.ods'): | |
print(row) | |
#Example Output | |
# cell(op='floor_divide_modulus', rank='5', python_name='', base_type='binary', category='arithmetic.composit', python_symbol='', python_function='divmod', python_library='builtins') | |
# cell(op='exponentiation', rank='7', python_name='pow', base_type='binary', category='arithmetic.exponent', python_symbol='**', python_function='', python_library='') | |
# cell(op='matrix.multiply', rank='5', python_name='matmul', base_type='binary', category='arithmetic.matrix', python_symbol='@', python_function='', python_library='') | |
# cell(op='modulus', rank='5', python_name='mod', base_type='binary', category='arithmetic.modulus', python_symbol='%', python_function='', python_library='') | |
# cell(op='divide', rank='5', python_name='truediv', base_type='binary', category='arithmetic.scaling', python_symbol='/', python_function='', python_library='') | |
# cell(op='floor_divide', rank='5', python_name='floordiv', base_type='binary', category='arithmetic.scaling', python_symbol='//', python_function='', python_library='') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment