Skip to content

Instantly share code, notes, and snippets.

@velenux
Created July 23, 2014 19:51
Show Gist options
  • Save velenux/afed07d4d1a559d58309 to your computer and use it in GitHub Desktop.
Save velenux/afed07d4d1a559d58309 to your computer and use it in GitHub Desktop.
Recursively calculate a "directory md5sum" based on it's content filename, size and mtime
import os
import hashlib
# calc directory checksum
def calc_dir_sum(path):
for root, dirs, files in os.walk(path):
dir_hash = hashlib.md5()
# calc checksum for each file
# based on file name, size and mtime
for f in files:
fullfn = os.path.join(root, f)
# skip symlinks
if os.path.islink(fullfn): continue
s = os.stat(fullfn)
fstr = "{0}/{1}/{2}".format(f, s.st_size, s.st_mtime)
dir_hash.update(fstr)
#print "{0} {1}".format(fullfn, hashlib.md5(fstr).hexdigest())
# calc checksum for each dir
# by recursively calling calc_dir_sum()
for d in dirs:
fulldn = os.path.join(root, d)
# skip symlinks
if os.path.islink(fulldn): continue
dir_hash.update(calc_dir_sum(fulldn))
print "{0} {1}".format(dir_hash.hexdigest(), root + '/')
return dir_hash.hexdigest()
calc_dir_sum("/your/data/path/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment