Last active
October 25, 2023 22:09
-
-
Save dmwyatt/b468f17f2173ff3f2750 to your computer and use it in GitHub Desktop.
[hash directory and capture filename data]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import hashlib | |
def hash_dir(path): | |
""" | |
Create a hash out of a directory that handles renamed files. | |
A hash of the contents of a file will not capture a hash of the filename. | |
This function will add filenames to the hash so that we can know if a | |
filename has changed as well as the file contents. | |
""" | |
# Create a hash out of a directory that will also handle | |
# renamed files. | |
path = os.path.abspath(path) | |
sha1_hash = hashlib.sha1() | |
filepaths = [] | |
for root, dirs, files in os.walk(path): | |
for name in files: | |
filepath = os.path.join(root, name) | |
filepaths.append(filepath) | |
# update our hash with data from this file | |
with open(filepath, "rb") as f: | |
while True: | |
content = f.read(4096) | |
if not content: | |
break | |
sha1_hash.update(content) | |
# Handle file renaming which doesn't change file contents | |
# by updating our hash with the names of all the files | |
filepaths.sort() | |
for filepath in filepaths: | |
sha1_hash.update(filepath) | |
return sha1_hash.hexdigest() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment