Skip to content

Instantly share code, notes, and snippets.

@slavanap
Last active January 27, 2020 23:02
Show Gist options
  • Save slavanap/7cc8d04cc742388cb6eb9b0389f6feb4 to your computer and use it in GitHub Desktop.
Save slavanap/7cc8d04cc742388cb6eb9b0389f6feb4 to your computer and use it in GitHub Desktop.
Organize files while changing folder structure
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import os
import os.path
import pickle
import sys
BUF_SIZE = 1024*1024*16
def get_hash(fn):
sha256 = hashlib.sha256()
size = 0
with open(fn, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
size += len(data)
sha256.update(data)
return "{}:{}:{}:{}".format(size, sha256.hexdigest(), sha256.digest_size, sha256.block_size)
def equal_files(filename1, filename2):
if os.path.getsize(filename1) != os.path.getsize(filename2):
return False
with open(filename1, 'rb') as f1, open(filename2, 'rb') as f2:
while True:
d1 = f1.read(BUF_SIZE)
d2 = f2.read(BUF_SIZE)
if d1 != d2:
return False
if not d1:
return True
def main(keepPath, clearPath):
if True:
keep = {}
for (dirpath, dirnames, filenames) in os.walk(keepPath):
for filename in filenames:
fn = os.path.join(dirpath, filename)
h = get_hash(fn)
keep_fn = keep.get(h, None)
if keep_fn is None:
keep[h] = fn
else:
if os.path.getsize(fn) > 0 and not equal_files(fn, keep_fn):
print("KEEP COLLISION: '{}' and '{}'".format(fn, keep_fn))
with open("keep.p", "wb") as f:
pickle.dump(keep, f)
else:
with open("keep.p", "rb") as f:
keep = pickle.load(f)
for (dirpath, dirnames, filenames) in os.walk(clearPath):
for filename in filenames:
fn = os.path.join(dirpath, filename)
h = get_hash(fn)
keep_fn = keep.get(h, None)
if keep_fn is None:
pass #print("OLD FILE: {}".format(fn))
else:
if os.path.getsize(fn) > 0 and not equal_files(fn, keep_fn):
print("COLLISION: '{}' and '{}'".format(fn, keep_fn))
else:
print("REMOVE: {}".format(fn))
os.remove(fn)
dirs_to_delete = reversed(sorted(dirpath for (dirpath, dirnames, filenames) in os.walk(clearPath) if not filenames))
for dirpath in dirs_to_delete:
if not os.listdir(dirpath):
os.rmdir(dirpath)
if __name__ == "__main__":
main(keepPath=sys.argv[1], clearPath=sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment