Skip to content

Instantly share code, notes, and snippets.

@mh0w
Created June 26, 2024 15:49
Show Gist options
  • Save mh0w/8365e880fe640b830860354da33f914e to your computer and use it in GitHub Desktop.
Save mh0w/8365e880fe640b830860354da33f914e to your computer and use it in GitHub Desktop.
Extract table schemas from an EPIDD
"""
Extract table schemas from an EPIDD.
Requirements: pip install pandas xlsxwriter
"""
import pandas as pd
from docx.api import Document
doc_path = "C:/path/to/my.docx"
out_path = "C:/path/to/my.xlsx"
document = Document(doc_path)
my_dict = {}
j = 0
with pd.ExcelWriter(out_path) as my_workbook:
for i in (
list(range(23, 67, 2))
+ list(range(68, 72, 2))
+ list(range(73, 83, 2))
+ list(range(84, 108, 2))
):
j += 1
print(f"5.{j}")
my_dict[f"5.{j}"] = document.tables[i]
data = []
keys = None
for i, row in enumerate(my_dict[f"5.{j}"].rows):
text = (cell.text for cell in row.cells)
if i == 0:
keys = tuple(text)
continue
row_data = dict(zip(keys, text))
data.append(row_data)
my_dict[f"5.{j}"] = pd.DataFrame(data)
# export to excel file
my_dict[f"5.{j}"].to_excel(my_workbook, sheet_name=f"5.{j}", index=False)
# Resize the worksheet columns
my_workbook.sheets[f"5.{j}"].set_column(1, 1, 30)
my_workbook.sheets[f"5.{j}"].set_column(2, 2, 20)
my_workbook.sheets[f"5.{j}"].set_column(3, 3, 115)
my_workbook.sheets[f"5.{j}"].set_column(4, 4, 30)
print(f"See '{out_path}'")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment