Skip to content

Instantly share code, notes, and snippets.

@yeiichi
Last active July 15, 2024 01:42
Show Gist options
  • Save yeiichi/f433e354704d7af3567de88567b7088b to your computer and use it in GitHub Desktop.
Save yeiichi/f433e354704d7af3567de88567b7088b to your computer and use it in GitHub Desktop.
Get properties from an excel file.
#!/usr/bin/env python3
from datetime import datetime
from pathlib import Path
import pandas as pd
from natsort import natsorted
from openpyxl import load_workbook
CWD = Path('.')
def sft(dttm_str):
# Todo: Returns UTC time?
pattern = '%a, %Y-%m-%d %H:%M:%S'
return dttm_str.strftime(pattern) if dttm_str else None
def get_properties(xl_path):
"""Get properties from an excel file.
Args:
xl_path (pathlib.PosixPath):
Returns:
dict: properties of the excel file.
"""
prop = load_workbook(xl_path).properties
return dict(
filename=xl_path.name,
created_utc=sft(prop.created), # stf: strftime
modified_utc=sft(prop.modified),
revision=prop.revision,
version=prop.version,
last_modified_by=prop.lastModifiedBy,
last_printed_utc=sft(prop.lastPrinted),
subject=prop.subject,
creator=prop.creator,
description=prop.description,
keywords=prop.keywords,
category=prop.category,
content_status=prop.contentStatus,
identifier=prop.identifier,
language=prop.language)
if __name__ == '__main__':
xl_paths = natsorted(Path(
input('Target excel Directory? >> ')).glob('*.xlsx'))
dfs = []
for path_ in xl_paths:
dfs.append(pd.DataFrame([get_properties(path_)]))
# Save as a CSV file.
fname = f'xl_property_{datetime.now().strftime("%Y%m%d%H%M%S")}.csv'
fpath = CWD / fname
pd.concat(dfs).to_csv(fpath, index=False)
print(f'Saved: {fpath}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment