Skip to content

Instantly share code, notes, and snippets.

@mh61503891
Created August 29, 2023 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mh61503891/faafb7c28e33eff28ceca528d91cc7c4 to your computer and use it in GitHub Desktop.
Save mh61503891/faafb7c28e33eff28ceca528d91cc7c4 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
# Usage:
# $ python3 this.py > output.csv
# $ nkf --overwrite --oc=UTF-8-BOM output.csv
import glob
import hashlib
import os
import sys
from openpyxl import load_workbook
import pandas as pd
def get_xlsx_properties(xlsx_path: str) -> list[str]:
try:
print(f"loading: {xlsx_path}", file=sys.stderr)
with open(xlsx_path, "rb") as f:
file_hash = hashlib.file_digest(f, "sha256").hexdigest()
file_size = os.path.getsize(xlsx_path)
props = load_workbook(xlsx_path).properties
row = [
xlsx_path,
props.creator,
props.lastModifiedBy,
props.created,
props.modified,
file_size,
file_hash,
"ok",
]
return row
except Exception as e:
row = [
xlsx_path,
None,
None,
None,
None,
None,
None,
str(e),
]
return row
df = pd.DataFrame(
[get_xlsx_properties(path) for path in glob.glob("*.xlsx")],
columns=[
"prop_path",
"prop_creator",
"prop_lastModifiedBy",
"prop_created",
"prop_modified",
"file_size",
"file_hash",
"status",
],
)
df.to_csv(
path_or_buf=sys.stdout,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment