Last active
October 28, 2023 11:42
-
-
Save jakob-hede/7ab1c138e17fc2a8646ce8c0444fe0f6 to your computer and use it in GitHub Desktop.
challenge_duplicate_file_deletor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from pathlib import Path | |
# Do noisy debug spouting?: | |
# dobug = True | |
dobug = False | |
class File: | |
def __init__(self, path: Path) -> None: | |
super().__init__() | |
self.name = path.name | |
self.head, sep, tail = self.name.partition(' copy') | |
self.is_copy = sep != '' | |
suffix_base = tail if self.is_copy else self.name | |
_, sep, tail = suffix_base.rpartition('.') | |
self.suffix = tail if sep != '' else '' | |
# Fix the problem that spaces are sorted before dots: | |
self.sort_name = self.name.replace(' ', '‾') | |
self.duplicates = [] | |
def __repr__(self): | |
status = 'COPY' if self.is_copy else '....' | |
return f'<{status} "{self.name}">' | |
def __lt__(self, other): | |
return self.sort_name < other.sort_name | |
def matches(self, ori_item): | |
verity = ori_item.head.startswith(self.head) and \ | |
((self.suffix == '') or (ori_item.suffix == self.suffix)) | |
return verity | |
def add_duplicate(self, duplicate): | |
self.duplicates.append(duplicate) | |
class Dupletor: | |
@classmethod | |
def spawn(cls): | |
cls.spout('spawn Dupletor', debuggy=False) | |
# Enable to generate test victims directory and files: | |
# cls().generate_victims() | |
cls().duplete() | |
def duplete(self): | |
if len(sys.argv) < 2: | |
self.error(f'Usage: python dupletor.py <path>') | |
return | |
path = Path(sys.argv[1]).resolve() | |
if not path.exists(): | |
self.error(f'"{path}" does not exist') | |
return | |
if not path.is_dir(): | |
self.error(f'"{path}" is not a directory') | |
return | |
path_items = list(path.iterdir()) | |
alls = [File(item) for item in path_items if not item.is_dir()] | |
if len(alls) < 1: | |
self.error(f'"{path}" No files found.') | |
return | |
self.announce(f'duplete "{path}"') | |
#### | |
alls.sort() | |
self.dashout('alls', series=alls) | |
originals = [] | |
rests = [] | |
dupes = [] | |
for ofil in alls: | |
if ofil.is_copy: | |
rests.append(ofil) | |
else: | |
originals.append(ofil) | |
self.dashout('originals', series=originals) | |
self.dashout('rest', series=rests) | |
#### | |
self.dashout('stuff') | |
for rest_item in rests: | |
self.debug(f' - rest_item: {rest_item}') | |
recipient = originals | |
for ori_item in originals: | |
if rest_item.matches(ori_item): | |
self.debug(f' -- ori: {ori_item}') | |
ori_item.add_duplicate(rest_item) | |
recipient = dupes | |
break | |
recipient.append(rest_item) | |
#### | |
self.dashout('originals', series=originals, debuggy=False) | |
self.dashout('dupes', series=dupes, debuggy=False) | |
self.show_relations(originals) | |
self.announce('Do some actual logistics ...') | |
def show_relations(self, originals): | |
self.dashout('relations', debuggy=False) | |
for original in originals: | |
if original.duplicates: | |
self.spout(f' - {original}', debuggy=False) | |
for duplicate in original.duplicates: | |
self.debug(f'\t\t-- {duplicate}', debuggy=False) | |
else: | |
self.debug(f' - {original}', debuggy=False, color_num=34) | |
# region utils | |
@staticmethod | |
def spout(*args, color_num=92, debuggy=True): | |
if dobug: | |
debuggy = False # False enables debugging! ;-) | |
if debuggy: | |
return | |
# spout in glorious colors: | |
print(f'\033[{color_num}m {" ".join(map(str, args))}\033[0m') | |
def error(self, *args, color_num=91, debuggy=False): | |
self.spout(*args, color_num=color_num, debuggy=debuggy) | |
def announce(self, *args, color_num=93): | |
self.spout(*args, color_num=color_num, debuggy=False) | |
def debug(self, *args, color_num=90, debuggy=True): | |
self.spout(*args, color_num=color_num, debuggy=debuggy) | |
def dashout(self, arg, series=None, debuggy=True): | |
dash_ = '-' * (80 - len(arg) - 2) | |
self.spout(f'{arg}: {dash_}', debuggy=debuggy) | |
if series: | |
for item in series: | |
self.debug(f' - {item}', debuggy=debuggy) | |
# endregion utils | |
############################################################ | |
def generate_victims(self): | |
txt = self.generatext | |
self.debug(f' - txt: {txt}') | |
destin_dir = Path(__file__).parent / 'victims' | |
destin_dir.mkdir(parents=True, exist_ok=True) | |
lines = txt.splitlines() | |
self.debug('-' * 80) | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
self.debug(f' - line: "{line}"') | |
file = destin_dir / line | |
file.touch() | |
@property | |
def generatext(self) -> str: | |
txt = """ | |
faang.png | |
faang copy.png | |
index copy.html | |
index copy 2.html | |
index copy 3.html | |
package.abc | |
package.json | |
package copy.json | |
package copy 2.json | |
package copy 2.xyz | |
README.md | |
README copy.md | |
Screenshot 2023-09-04 at 3.01.45 PM copy | |
Screenshot 2023-09-04 at 3.01.45 PM copy 2 | |
suffixy 12.34 | |
suffixy 12.34 copy | |
suffixy 12.34 copy 2 | |
suffixy 12.34 xyz | |
suffixy 12.34 xyz copy | |
suffixy 12.34 xyz copy 2 | |
abc | |
abc copy | |
abc copy 2 | |
def.aaa.bbb | |
def.aaa.bbb.ccc | |
""" | |
return txt | |
if __name__ == '__main__': | |
Dupletor.spawn() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
/usr/bin/python3 /opt/projects/challenge_duplicate_file_deletor/py/dupletor.py victims
spawn Dupletor
duplete "/opt/projects/challenge_duplicate_file_deletor/py/victims"
originals: ---------------------------------------------------------------------
dupes: -------------------------------------------------------------------------
Do some actual logistics ...
Process finished with exit code 0