Created
October 3, 2023 20:03
-
-
Save righthandabacus/9bcc523e8ac9fd1efe568b09224f5cda to your computer and use it in GitHub Desktop.
Dump Excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Read an Excel file -> Write a Python script that will use OpenPyXL to reproduce the Excel file | |
""" | |
import inspect | |
import copy | |
from typing import List | |
import openpyxl | |
imported = {"styles": set()} | |
stylecls = inspect.getmembers(openpyxl.styles, inspect.isclass) # list of (name, class) | |
stylecls = tuple([x for _, x in stylecls]) # tuple of style classes | |
typecls = tuple(x for _,x in inspect.getmembers(openpyxl.descriptors.base, inspect.isclass)) | |
def style2code(obj) -> str: | |
"""Convert a style object into code""" | |
# convert proxy object into real | |
obj = copy.copy(obj) | |
# get object class name and parameters | |
cls = obj.__class__ | |
name = cls.__name__ | |
clsparams = inspect.signature(cls.__init__).parameters | |
try: | |
objattrs = dict(inspect.getmembers(obj)) | |
except NotImplementedError: | |
objattrs = {k: getattr(obj, k) for k in clsparams if hasattr(obj, k)} | |
# build code | |
imported["styles"].add(name) | |
params = [] | |
for attr, paramobj in clsparams.items(): | |
if attr == "self": | |
continue # first arg in class constructor | |
# assume arg name in class signature has exact match in obj attrs | |
# otherwise skip | |
if attr not in objattrs: | |
continue | |
# some blacklisted attributes to skip | |
if (name,attr) in [('Color','index')]: | |
continue | |
value = objattrs[attr] | |
if isinstance(value, typecls): | |
continue # skip and use default | |
if value == paramobj.default: | |
continue # declutter by avoiding the default | |
if isinstance(value, stylecls): | |
params.append(f"{attr}={style2code(value)}") | |
else: | |
params.append(f"{attr}={repr(value)}") | |
return f"{name}({', '.join(params)})" | |
def dump_excel(filepath) -> List[str]: | |
"""Read an Excel file and print out OpenPyXL code that recreated it""" | |
wb = openpyxl.load_workbook(filepath) | |
code = [] | |
# dump each worksheet | |
for n, ws in enumerate(wb.worksheets): | |
code.append("") | |
if n == 0: | |
code.append(f"ws = wb.active") | |
code.append(f"ws.title = {repr(ws.title)}") | |
else: | |
code.append(f"ws = wb.create_sheet({repr(ws.title)})") | |
code.extend(dump_worksheet(ws)) | |
# prepend append | |
code = [ | |
f"import datetime", | |
f"", | |
f"import openpyxl", | |
f"from openpyxl.styles import {', '.join(imported['styles'])}", | |
f"", | |
f"wb = openpyxl.Workbook()", | |
] + code + [ | |
f"", | |
f"wb.save('myfile.xlsx')", | |
] | |
# TODO extract named styles | |
return code | |
def dump_worksheet(ws) -> List[str]: | |
"""Extract data and metadata from an OpenPyXL worksheet object and return the code that recreated it""" | |
code = [] | |
# set title | |
# iterate over rows and cells | |
for row in ws.iter_rows(): | |
for cell in row: | |
code.append("") | |
code.append(f"cell = ws[{repr(cell.coordinate)}]") | |
code.extend(dump_cell(cell)) | |
# TODO handle merged cells and freezes | |
# set rows and columns | |
code.append("") | |
for row in ws.iter_rows(): | |
rowid = row[0].row | |
size = ws.row_dimensions[rowid].height | |
if size is None: | |
continue # default row height | |
code.append(f"ws.row_dimensions[{rowid}].height = {size}") | |
for col in ws.iter_cols(): | |
colid = col[0].column_letter | |
size = ws.column_dimensions[colid].width | |
code.append(f"ws.column_dimensions[{repr(colid)}].width = {size}") | |
return code | |
def dump_cell(cell) -> List[str]: | |
"""Extract cell data from OpenPyXL cell object and return the code that recreated it""" | |
code = [] | |
# simple data first | |
for attr in "value number_format data_type comment style".split(): | |
value = getattr(cell, attr) | |
if value is None: | |
continue # skip if empty | |
code.append(f"cell.{attr} = {repr(value)}") | |
# style data | |
for attr in "alignment border fill font protection".split(): | |
value = getattr(cell, attr) | |
code.append(f"cell.{attr} = {style2code(value)}") | |
return code | |
file_path = "input_excel.xlsx" | |
code = dump_excel(file_path) | |
print("\n".join(code)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment