Skip to content

Instantly share code, notes, and snippets.

@kevcooper
Created May 15, 2020 18:49
Show Gist options
  • Save kevcooper/6758cc58a83ed43b89c67e9d648a1a5b to your computer and use it in GitHub Desktop.
Save kevcooper/6758cc58a83ed43b89c67e9d648a1a5b to your computer and use it in GitHub Desktop.
import glob
import sys
import os
import hashlib
root_dir = sys.argv[1]
hash_map = {}
for dirpath, dirs, files in os.walk(root_dir):
for f in files:
data = None
md5 = None
try:
file_path = os.path.join(dirpath, f)
with open(file_path, 'rb') as my_file:
data = my_file.read()
md5 = hashlib.md5(data).hexdigest()
if md5 not in hash_map.keys():
hash_map[md5] = []
hash_map[md5].append(file_path)
except:
pass
dupes = list(filter(lambda x: len(x) > 1, hash_map.values()))
for d in dupes:
print(','.join(d))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment