Skip to content

Instantly share code, notes, and snippets.

@devilholk
Created December 1, 2019 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save devilholk/03b60bf43e588a1cbf3379f6caff654b to your computer and use it in GitHub Desktop.
Save devilholk/03b60bf43e588a1cbf3379f6caff654b to your computer and use it in GitHub Desktop.
Read Open Document Table and put in a list of named tuples
import xml.etree.ElementTree, re, zipfile, collections
def get_ns(tag):
ns = re.findall(r'\{(.*)\}(.*)', tag)[0] #Assume only one
return ns
def one_or_default(it, default=None):
try:
result = it.__next__()
try:
extra = it.__next__()
raise Exception(f'Too much data: {extra}')
except StopIteration:
return result
except StopIteration:
return default
def any_differs(it, value):
for i in it:
if i != value:
return True
return False
def ods_to_named_tuples(filename):
#Fuggly solution, probably gives you the first table if you have multiple sheets, didn't test
with zipfile.ZipFile(filename) as z:
with z.open('content.xml') as infile:
ns_map = dict()
table = None
for ev, v in xml.etree.ElementTree.iterparse(infile, events='start-ns end'.split()):
if ev == 'start-ns':
key, value = v
ns_map[key] = value
elif ev == 'end':
ns, tag = get_ns(v.tag)
if tag == 'table' and ns == ns_map['table']:
table = v
break
assert table, f'Could not find any table in {infile}'
result = list()
cell_type = None
for r in table.findall('table:table-row', ns_map):
row = [one_or_default((t.text.strip() for t in c.findall('text:p', ns_map)), '') for c in r.findall('table:table-cell', ns_map)]
while row and not row[-1]:
del row[-1]
if any_differs(row, ''):
if cell_type:
missing_cells = len(cell_type._fields) - len(row)
result.append(cell_type(*(row + [''] * missing_cells)))
else:
cell_type = collections.namedtuple('cell', row)
return result
for row in ods_to_named_tuples('/home/devilholk/operators.ods'):
print(row)
#Example Output
# cell(op='floor_divide_modulus', rank='5', python_name='', base_type='binary', category='arithmetic.composit', python_symbol='', python_function='divmod', python_library='builtins')
# cell(op='exponentiation', rank='7', python_name='pow', base_type='binary', category='arithmetic.exponent', python_symbol='**', python_function='', python_library='')
# cell(op='matrix.multiply', rank='5', python_name='matmul', base_type='binary', category='arithmetic.matrix', python_symbol='@', python_function='', python_library='')
# cell(op='modulus', rank='5', python_name='mod', base_type='binary', category='arithmetic.modulus', python_symbol='%', python_function='', python_library='')
# cell(op='divide', rank='5', python_name='truediv', base_type='binary', category='arithmetic.scaling', python_symbol='/', python_function='', python_library='')
# cell(op='floor_divide', rank='5', python_name='floordiv', base_type='binary', category='arithmetic.scaling', python_symbol='//', python_function='', python_library='')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment