Skip to content

Instantly share code, notes, and snippets.

@IvanBayan
Created October 3, 2018 06:57
Show Gist options
  • Save IvanBayan/2dd9af4439764df6061eebdce09b7db2 to your computer and use it in GitHub Desktop.
Save IvanBayan/2dd9af4439764df6061eebdce09b7db2 to your computer and use it in GitHub Desktop.
Cleanup dir. Delete all files with same hash or delete all files with same hash which matched to regexp.
import hashlib
import os
import argparse
import re
def make_hash(filename):
with open(filename, 'rb') as afile:
hasher = hashlib.md5()
buf = afile.read(6553600)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(6553600)
return hasher.hexdigest()
if __name__ == '__main__':
hashes = {}
todelete = []
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dir", required=True,
help="directory to clean up (recursively)")
ap.add_argument("-r", "--regexp", required=False,
help="delete all files with same hash which matched regexp")
args = vars(ap.parse_args())
if args['regexp'] is not None:
r = re.compile(args['regexp'])
for directory, subdirs, files in os.walk(args['dir']):
for ffile in files:
print("hashing " + ffile)
hash = make_hash("{}/{}".format(args['dir'], ffile))
try:
hashes[hash].append(ffile)
except:
hashes[hash] = [ffile]
for h in hashes.keys():
if len(hashes[h]) == 1:
continue
if args['regexp'] is not None:
todelete.extend(list(filter(r.match, hashes[h])))
else:
todelete.extend(hashes[h][1:])
for f in todelete:
print("Deleting {}".format(f))
os.remove("{}/{}".format(args['dir'], f))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment