Skip to content

Instantly share code, notes, and snippets.

@pavelch
Last active June 14, 2018 10:04
Show Gist options
  • Save pavelch/9ea30c40c4529fecd8bc8bd59ff504cb to your computer and use it in GitHub Desktop.
Save pavelch/9ea30c40c4529fecd8bc8bd59ff504cb to your computer and use it in GitHub Desktop.
List directory, print duplicated files names
#!/usr/bin/env python
"""List directory, print duplicated files names
"""
from __future__ import print_function
import os
import argparse
import hashlib
import sys
def main(args):
if not os.path.isdir(args.path):
raise Exception('Not a directory')
files = os.listdir(args.path)
hashes = {}
for file in files:
filepath = os.path.join(args.path, file)
if os.path.isfile(filepath):
blocksize = 65536
hasher = hashlib.md5()
with open(filepath, 'rb') as afile:
buf = afile.read(blocksize)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(blocksize)
filehash = hasher.hexdigest()
if filehash in hashes.keys():
hashes[filehash].append(file)
else:
hashes[filehash] = [file]
out = ''
for key, value in hashes.items():
if len(value) > 1:
for index, name in enumerate(value):
separator = ' ' if index % 2 == 0 else '\n'
out = out + name + separator
out = out + '\n#####\n'
print(out)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-p', '--path', dest='path', type=str, help='Path look for duplicates')
try:
args = parser.parse_args()
main(args)
except:
parser.print_help()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment