Skip to content

Instantly share code, notes, and snippets.

@tokejepsen
Last active January 21, 2021 09:35
Show Gist options
  • Save tokejepsen/902b7adff95c5943ce702c2a32d66ee9 to your computer and use it in GitHub Desktop.
Save tokejepsen/902b7adff95c5943ce702c2a32d66ee9 to your computer and use it in GitHub Desktop.
Google Drive Duplicates
import os
import re
import sys
import shutil
def get_size(start_path):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
if os.path.isfile(start_path) and not os.path.islink(start_path):
total_size += os.path.getsize(start_path)
return total_size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def clean_up(path):
duplicate_dirs = []
duplicate_files = []
size = 0
last_size = 0
patterns = [
r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{6}\.[0-9]{6}Z",
r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2} [0-9]{2} [0-9]{2}\.[0-9]{3}Z",
r"\([0-9]*\)"
]
for root, dirs, files in os.walk(path):
for dir in dirs:
delete = False
for pattern in patterns:
if re.search(pattern, dir):
delete = True
break
if delete:
path = os.path.join(root, dir)
duplicate_dirs.append(path)
size += get_size(path)
for file in files:
delete = False
for pattern in patterns:
if re.search(pattern, file):
delete = True
break
if not delete and file.endswith(".tmp"):
delete = True
if delete:
path = os.path.join(root, file)
duplicate_files.append(path)
size += get_size(path)
if last_size != size:
print(sizeof_fmt(size))
last_size = size
for path in duplicate_files:
try:
os.remove(path)
except Exception:
print(f"Could not remove {path}.")
for path in duplicate_dirs:
try:
shutil.rmtree(path)
except Exception:
print(f"Could not remove {path}.")
if __name__ == "__main__":
clean_up(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment