Skip to content

Instantly share code, notes, and snippets.

@flying-sheep
Created December 19, 2017 14:00
Show Gist options
  • Save flying-sheep/b3a5b354c075cf041aee06041b6c84db to your computer and use it in GitHub Desktop.
Save flying-sheep/b3a5b354c075cf041aee06041b6c84db to your computer and use it in GitHub Desktop.
import sys
import hashlib
import pathlib
BUF_SIZE = 65536
def get_hash(fp):
sha1 = hashlib.sha1()
with open(fp, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
sha1.update(data)
return sha1.hexdigest()
def compare_two_datastores(path_ds1: pathlib.Path, path_ds2: pathlib.Path):
assert isinstance(path_ds1, pathlib.Path), "must be a pathlib path"
assert isinstance(path_ds2, pathlib.Path), "must be a pathlib path"
hashes_path_ds1 = {x.name:get_hash(x) for x in path_ds1.rglob("*.*")}
hashes_path_ds2 = {x.name:get_hash(x) for x in path_ds2.rglob("*.*")}
diffs = [x for x in hashes_c7 if hashes_ff[x] != hashes_c7[x]]
return diffs
if __name__ == "__main__":
path_c7 = pathlib.Path("./c7ccabbd2bc1451cab75963e658de169/")
path_ff = pathlib.Path("./ff47b463f36740f6aec14ce38c4e568b/")
compare_two_datastores(path_c7, path_ff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment