Created
March 27, 2021 20:22
-
-
Save leonhard-s/50834b819e60468dc8927d5f39d517cf to your computer and use it in GitHub Desktop.
A conversion script to turn text mode Logger Pro CMBL files into Excel spreadsheets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A script for loading Logger Pro *.cmbl files into an Excel workbook. | |
Every matching file will be loaded into a separate Excel worksheet. | |
Note that this converter only works with text-based cmbl files, binary | |
files are not supported. | |
""" | |
import argparse | |
import array | |
import collections | |
import contextlib | |
import math | |
import os | |
import sys | |
from xml.etree import ElementTree as xml | |
from typing import Dict, Iterator, List, Tuple | |
import openpyxl | |
from typing_extensions import TypeAlias | |
# Type aliases | |
Array: TypeAlias = array.ArrayType | |
OrderedDict: TypeAlias = 'collections.OrderedDict' | |
DataSet: TypeAlias = 'OrderedDict[str, Array[float]]' | |
def _parse_cmbl(filepath: str, use_double: bool = False | |
) -> 'List[Tuple[str, DataSet]]': | |
"""Return a list of all datasets found in the file. | |
The dataset display name, as well as the column names specified | |
will be extracted and used as the keys of the returned dictionary | |
and contained datasets respectively. | |
The dataset arrays will always contain Python floats, but the | |
precision used in memory may differ. By default, this only uses C | |
floats to store the dataset values (i.e. 32 bits per value). | |
Setting the `use_double` flag to True will instead use C doubles | |
(i.e. 64 bits per value) as the internal data representation. This | |
increases representation accuracy, but also doubles memory usage. | |
Args: | |
filepath (str): The path of the file to load. | |
use_double (bool, optional): Whether to use C doubles for | |
stored data. This is what Python's `float` type uses. Only | |
to True if you require the extra precision, this doubles | |
memory usage. Defaults to False. | |
Returns: | |
List[Tuple[str, DataSet]]: A list of dataset names and their | |
corresponding datasets. | |
""" | |
try: | |
tree: xml.ElementTree = xml.parse(filepath) | |
except xml.ParseError: | |
print('Unable to parse file. Note that only text files are supported, ' | |
'the CMBL binary format is not') | |
sys.exit(1) | |
# Parse file root | |
datasets: 'List[Tuple[str, DataSet]]' = [] | |
for l1_ele in tree.getroot(): | |
if l1_ele.tag != 'DataSet': | |
continue | |
# Parse dataset | |
dataset_name = '' | |
dataset_data: DataSet = collections.OrderedDict() | |
for l2_ele in l1_ele: | |
if l2_ele.tag == 'DataSetName': | |
dataset_name = l2_ele.text.strip() | |
if l2_ele.tag != 'DataColumn': | |
continue | |
# Parse dataset columns | |
col_name = '' | |
col_data: Array[float] = array.array('d' if use_double else 'f') | |
for l3_ele in l2_ele: | |
if l3_ele.tag == 'DataObjectName': | |
col_name = l3_ele.text.strip() | |
if l3_ele.tag != 'ColumnCells': | |
continue | |
# Populate array | |
for line in l3_ele.text.splitlines(): | |
if line.strip(): | |
col_data.append(float(line)) | |
# No column name specified | |
if not col_name: | |
if not col_data: | |
continue | |
col_name = f'Column #{len(dataset_data)+2}' | |
dataset_data[col_name] = col_data | |
# No dataset name specified | |
if not dataset_name: | |
if not dataset_data: | |
continue | |
dataset_name = f'Dataset #{len(datasets)+2}' | |
datasets.append((dataset_name, dataset_data)) | |
return datasets | |
def _format_dataset_table(datasets: 'OrderedDict[str, DataSet]') -> List[str]: | |
"""Generate a list of fixed-width rows for the dataset table. | |
This gets the name, column count, and row count for each dataset | |
provided in the dictionary and returns a list of constant-width | |
rows making up a table. The returned rows do not contain any | |
leading or trailing whitespace. | |
Args: | |
datasets (OrderedDict[str, DataSet]): The datasets to list. | |
Returns: | |
List[str]: A list of lines to print. Does not include leading | |
or trailing whitespace. | |
""" | |
# Calculate the width of each column in the table | |
max_name = max((len(n)+2 for n in datasets.keys())) # +2 for quotes | |
max_cols = max((len(d) for d in datasets.values())) | |
max_rows = max((len(next(iter(d.values()))) if d else 0) | |
for _, d in datasets.items()) | |
cols_width = (int(math.log10(max_cols)) + 1) if max_cols > 0 else 1 | |
rows_width = (int(math.log10(max_rows)) + 1) if max_rows > 0 else 1 | |
# Populate the table | |
table_rows: List[str] = [] | |
for name, dataset in datasets.items(): | |
num_rows = len(next(iter(dataset.values()))) if dataset else 0 | |
name = f'"{name}"' | |
table_rows.append(f'{name:<{max_name}}: ' | |
f'{len(dataset.values()):>{cols_width}} columns, ' | |
f'{num_rows:>{rows_width}} rows') | |
return table_rows | |
def main(in_file: str, out_file: str, | |
overwrite: bool, use_double: bool) -> None: | |
# Check input path | |
if not os.path.exists(in_file): | |
print(f'Invalid path: {in_file}') | |
sys.exit(1) | |
if not os.path.isfile(in_file): | |
print(f'Path is not a file, use --dir/-d for directories: {in_file}') | |
sys.exit(1) | |
# Check output path | |
if os.path.isfile(out_file): | |
if not overwrite: | |
print('The given output file already exists. Specify another name ' | |
f'or set the --force/-f flag to overwrite it: {out_file}') | |
sys.exit(1) | |
try: | |
os.remove(out_file) | |
except FileNotFoundError: | |
pass | |
# Parse file | |
print(f'Loading file \'{os.path.basename(in_file)}\'...') | |
datasets: 'OrderedDict[str, DataSet]' = collections.OrderedDict( | |
_parse_cmbl(in_file, use_double)) | |
# Display statistics | |
dataset_table = _format_dataset_table(datasets) | |
print(f'Datasets found: {len(datasets)}', | |
*(f' {d}' for d in dataset_table), | |
sep='\n') | |
# Write Excel file | |
workbook = openpyxl.Workbook() | |
print('In-memory workbook created') | |
for index, items in enumerate(datasets.items()): | |
name, dataset = items | |
print(f'Populating worksheet {index+1} of {len(datasets)}...') | |
# Create worksheet | |
ws = workbook.active if index == 0 else workbook.create_sheet() | |
ws.title = name | |
# Write column headers | |
ws.append(list(dataset.keys())) | |
# Write rows | |
iterators: List[Iterator[float]] = [iter(a) for a in dataset.values()] | |
try: | |
while True: | |
row = [next(i) for i in iterators] | |
ws.append(row) | |
except StopIteration: | |
pass | |
index += 1 | |
print(f'Writing workbook: {os.path.basename(out_file)}') | |
workbook.save(out_file) | |
if __name__ == '__main__': | |
_parser = argparse.ArgumentParser() | |
_parser.add_argument('path', help='Path to the file to convert') | |
_parser.add_argument('--output', '-o', default='', help='Path of the ' | |
'output worksheet, defaults to <input>.xlsx') | |
_parser.add_argument('--force', '-f', action='store_true', | |
help='If set, the output will overwrite any existing ' | |
'files of the same name') | |
_parser.add_argument('--dir', '-d', action='store_true', | |
help='If set, the input path is expected to be a ' | |
'directory, and all files from the directory are ' | |
'merged into a single Excel workbook') | |
_parser.add_argument('--doubles', '-D', action='store_true', | |
help='Whether to use doubles to store the parsed ' | |
'values, this effectively doubles memory usage') | |
_args = _parser.parse_args() | |
_filename = _args.output or os.path.basename(_args.path) | |
if not _filename: | |
# Use folder name as fallback | |
_filename = os.path.split(os.path.split(_args.path)[0])[1] | |
if not _filename.lower().endswith('.xlsx'): | |
_filename += '.xlsx' | |
if _args.dir: | |
_files = [os.path.join(_args.path, f) | |
for f in os.listdir(_args.path) if f.endswith('cmbl')] | |
for _index, _file in enumerate(_files): | |
print(f'Processing file {_index+1} of {len(_files)}...') | |
_loop_filename = ( | |
f'{os.path.splitext(os.path.split(_file)[1])[0]}.xlsx') | |
with contextlib.redirect_stdout(None): | |
main(_file, _loop_filename, _args.force, _args.doubles) | |
else: | |
main(_args.path, _filename, _args.force, _args.doubles) | |
print('done') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment