Skip to content

Instantly share code, notes, and snippets.

@righthandabacus
Created October 3, 2023 20:03
Show Gist options
  • Save righthandabacus/9bcc523e8ac9fd1efe568b09224f5cda to your computer and use it in GitHub Desktop.
Save righthandabacus/9bcc523e8ac9fd1efe568b09224f5cda to your computer and use it in GitHub Desktop.
Dump Excel
"""Read an Excel file -> Write a Python script that will use OpenPyXL to reproduce the Excel file
"""
import inspect
import copy
from typing import List
import openpyxl
imported = {"styles": set()}
stylecls = inspect.getmembers(openpyxl.styles, inspect.isclass) # list of (name, class)
stylecls = tuple([x for _, x in stylecls]) # tuple of style classes
typecls = tuple(x for _,x in inspect.getmembers(openpyxl.descriptors.base, inspect.isclass))
def style2code(obj) -> str:
"""Convert a style object into code"""
# convert proxy object into real
obj = copy.copy(obj)
# get object class name and parameters
cls = obj.__class__
name = cls.__name__
clsparams = inspect.signature(cls.__init__).parameters
try:
objattrs = dict(inspect.getmembers(obj))
except NotImplementedError:
objattrs = {k: getattr(obj, k) for k in clsparams if hasattr(obj, k)}
# build code
imported["styles"].add(name)
params = []
for attr, paramobj in clsparams.items():
if attr == "self":
continue # first arg in class constructor
# assume arg name in class signature has exact match in obj attrs
# otherwise skip
if attr not in objattrs:
continue
# some blacklisted attributes to skip
if (name,attr) in [('Color','index')]:
continue
value = objattrs[attr]
if isinstance(value, typecls):
continue # skip and use default
if value == paramobj.default:
continue # declutter by avoiding the default
if isinstance(value, stylecls):
params.append(f"{attr}={style2code(value)}")
else:
params.append(f"{attr}={repr(value)}")
return f"{name}({', '.join(params)})"
def dump_excel(filepath) -> List[str]:
"""Read an Excel file and print out OpenPyXL code that recreated it"""
wb = openpyxl.load_workbook(filepath)
code = []
# dump each worksheet
for n, ws in enumerate(wb.worksheets):
code.append("")
if n == 0:
code.append(f"ws = wb.active")
code.append(f"ws.title = {repr(ws.title)}")
else:
code.append(f"ws = wb.create_sheet({repr(ws.title)})")
code.extend(dump_worksheet(ws))
# prepend append
code = [
f"import datetime",
f"",
f"import openpyxl",
f"from openpyxl.styles import {', '.join(imported['styles'])}",
f"",
f"wb = openpyxl.Workbook()",
] + code + [
f"",
f"wb.save('myfile.xlsx')",
]
# TODO extract named styles
return code
def dump_worksheet(ws) -> List[str]:
"""Extract data and metadata from an OpenPyXL worksheet object and return the code that recreated it"""
code = []
# set title
# iterate over rows and cells
for row in ws.iter_rows():
for cell in row:
code.append("")
code.append(f"cell = ws[{repr(cell.coordinate)}]")
code.extend(dump_cell(cell))
# TODO handle merged cells and freezes
# set rows and columns
code.append("")
for row in ws.iter_rows():
rowid = row[0].row
size = ws.row_dimensions[rowid].height
if size is None:
continue # default row height
code.append(f"ws.row_dimensions[{rowid}].height = {size}")
for col in ws.iter_cols():
colid = col[0].column_letter
size = ws.column_dimensions[colid].width
code.append(f"ws.column_dimensions[{repr(colid)}].width = {size}")
return code
def dump_cell(cell) -> List[str]:
"""Extract cell data from OpenPyXL cell object and return the code that recreated it"""
code = []
# simple data first
for attr in "value number_format data_type comment style".split():
value = getattr(cell, attr)
if value is None:
continue # skip if empty
code.append(f"cell.{attr} = {repr(value)}")
# style data
for attr in "alignment border fill font protection".split():
value = getattr(cell, attr)
code.append(f"cell.{attr} = {style2code(value)}")
return code
file_path = "input_excel.xlsx"
code = dump_excel(file_path)
print("\n".join(code))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment