Last active
January 21, 2021 09:35
-
-
Save tokejepsen/902b7adff95c5943ce702c2a32d66ee9 to your computer and use it in GitHub Desktop.
Google Drive Duplicates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sys | |
import shutil | |
def get_size(start_path): | |
total_size = 0 | |
for dirpath, dirnames, filenames in os.walk(start_path): | |
for f in filenames: | |
fp = os.path.join(dirpath, f) | |
# skip if it is symbolic link | |
if not os.path.islink(fp): | |
total_size += os.path.getsize(fp) | |
if os.path.isfile(start_path) and not os.path.islink(start_path): | |
total_size += os.path.getsize(start_path) | |
return total_size | |
def sizeof_fmt(num, suffix='B'): | |
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: | |
if abs(num) < 1024.0: | |
return "%3.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
def clean_up(path): | |
duplicate_dirs = [] | |
duplicate_files = [] | |
size = 0 | |
last_size = 0 | |
patterns = [ | |
r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{6}\.[0-9]{6}Z", | |
r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2} [0-9]{2} [0-9]{2}\.[0-9]{3}Z", | |
r"\([0-9]*\)" | |
] | |
for root, dirs, files in os.walk(path): | |
for dir in dirs: | |
delete = False | |
for pattern in patterns: | |
if re.search(pattern, dir): | |
delete = True | |
break | |
if delete: | |
path = os.path.join(root, dir) | |
duplicate_dirs.append(path) | |
size += get_size(path) | |
for file in files: | |
delete = False | |
for pattern in patterns: | |
if re.search(pattern, file): | |
delete = True | |
break | |
if not delete and file.endswith(".tmp"): | |
delete = True | |
if delete: | |
path = os.path.join(root, file) | |
duplicate_files.append(path) | |
size += get_size(path) | |
if last_size != size: | |
print(sizeof_fmt(size)) | |
last_size = size | |
for path in duplicate_files: | |
try: | |
os.remove(path) | |
except Exception: | |
print(f"Could not remove {path}.") | |
for path in duplicate_dirs: | |
try: | |
shutil.rmtree(path) | |
except Exception: | |
print(f"Could not remove {path}.") | |
if __name__ == "__main__": | |
clean_up(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment