Skip to content

Instantly share code, notes, and snippets.

@dmwyatt
Last active October 25, 2023 22:09
Show Gist options
  • Save dmwyatt/b468f17f2173ff3f2750 to your computer and use it in GitHub Desktop.
Save dmwyatt/b468f17f2173ff3f2750 to your computer and use it in GitHub Desktop.
[hash directory and capture filename data]
import os
import hashlib
def hash_dir(path):
"""
Create a hash out of a directory that handles renamed files.
A hash of the contents of a file will not capture a hash of the filename.
This function will add filenames to the hash so that we can know if a
filename has changed as well as the file contents.
"""
# Create a hash out of a directory that will also handle
# renamed files.
path = os.path.abspath(path)
sha1_hash = hashlib.sha1()
filepaths = []
for root, dirs, files in os.walk(path):
for name in files:
filepath = os.path.join(root, name)
filepaths.append(filepath)
# update our hash with data from this file
with open(filepath, "rb") as f:
while True:
content = f.read(4096)
if not content:
break
sha1_hash.update(content)
# Handle file renaming which doesn't change file contents
# by updating our hash with the names of all the files
filepaths.sort()
for filepath in filepaths:
sha1_hash.update(filepath)
return sha1_hash.hexdigest()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment