Last active
July 15, 2024 01:42
-
-
Save yeiichi/f433e354704d7af3567de88567b7088b to your computer and use it in GitHub Desktop.
Get properties from an excel file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from datetime import datetime | |
from pathlib import Path | |
import pandas as pd | |
from natsort import natsorted | |
from openpyxl import load_workbook | |
CWD = Path('.') | |
def sft(dttm_str): | |
# Todo: Returns UTC time? | |
pattern = '%a, %Y-%m-%d %H:%M:%S' | |
return dttm_str.strftime(pattern) if dttm_str else None | |
def get_properties(xl_path): | |
"""Get properties from an excel file. | |
Args: | |
xl_path (pathlib.PosixPath): | |
Returns: | |
dict: properties of the excel file. | |
""" | |
prop = load_workbook(xl_path).properties | |
return dict( | |
filename=xl_path.name, | |
created_utc=sft(prop.created), # stf: strftime | |
modified_utc=sft(prop.modified), | |
revision=prop.revision, | |
version=prop.version, | |
last_modified_by=prop.lastModifiedBy, | |
last_printed_utc=sft(prop.lastPrinted), | |
subject=prop.subject, | |
creator=prop.creator, | |
description=prop.description, | |
keywords=prop.keywords, | |
category=prop.category, | |
content_status=prop.contentStatus, | |
identifier=prop.identifier, | |
language=prop.language) | |
if __name__ == '__main__': | |
xl_paths = natsorted(Path( | |
input('Target excel Directory? >> ')).glob('*.xlsx')) | |
dfs = [] | |
for path_ in xl_paths: | |
dfs.append(pd.DataFrame([get_properties(path_)])) | |
# Save as a CSV file. | |
fname = f'xl_property_{datetime.now().strftime("%Y%m%d%H%M%S")}.csv' | |
fpath = CWD / fname | |
pd.concat(dfs).to_csv(fpath, index=False) | |
print(f'Saved: {fpath}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment