Skip to content

Instantly share code, notes, and snippets.

@jakob-hede
Last active October 28, 2023 11:42
Show Gist options
  • Save jakob-hede/7ab1c138e17fc2a8646ce8c0444fe0f6 to your computer and use it in GitHub Desktop.
Save jakob-hede/7ab1c138e17fc2a8646ce8c0444fe0f6 to your computer and use it in GitHub Desktop.
challenge_duplicate_file_deletor
#!/usr/bin/env python3
import sys
from pathlib import Path
# Do noisy debug spouting?:
# dobug = True
dobug = False
class File:
def __init__(self, path: Path) -> None:
super().__init__()
self.name = path.name
self.head, sep, tail = self.name.partition(' copy')
self.is_copy = sep != ''
suffix_base = tail if self.is_copy else self.name
_, sep, tail = suffix_base.rpartition('.')
self.suffix = tail if sep != '' else ''
# Fix the problem that spaces are sorted before dots:
self.sort_name = self.name.replace(' ', '‾')
self.duplicates = []
def __repr__(self):
status = 'COPY' if self.is_copy else '....'
return f'<{status} "{self.name}">'
def __lt__(self, other):
return self.sort_name < other.sort_name
def matches(self, ori_item):
verity = ori_item.head.startswith(self.head) and \
((self.suffix == '') or (ori_item.suffix == self.suffix))
return verity
def add_duplicate(self, duplicate):
self.duplicates.append(duplicate)
class Dupletor:
@classmethod
def spawn(cls):
cls.spout('spawn Dupletor', debuggy=False)
# Enable to generate test victims directory and files:
# cls().generate_victims()
cls().duplete()
def duplete(self):
if len(sys.argv) < 2:
self.error(f'Usage: python dupletor.py <path>')
return
path = Path(sys.argv[1]).resolve()
if not path.exists():
self.error(f'"{path}" does not exist')
return
if not path.is_dir():
self.error(f'"{path}" is not a directory')
return
path_items = list(path.iterdir())
alls = [File(item) for item in path_items if not item.is_dir()]
if len(alls) < 1:
self.error(f'"{path}" No files found.')
return
self.announce(f'duplete "{path}"')
####
alls.sort()
self.dashout('alls', series=alls)
originals = []
rests = []
dupes = []
for ofil in alls:
if ofil.is_copy:
rests.append(ofil)
else:
originals.append(ofil)
self.dashout('originals', series=originals)
self.dashout('rest', series=rests)
####
self.dashout('stuff')
for rest_item in rests:
self.debug(f' - rest_item: {rest_item}')
recipient = originals
for ori_item in originals:
if rest_item.matches(ori_item):
self.debug(f' -- ori: {ori_item}')
ori_item.add_duplicate(rest_item)
recipient = dupes
break
recipient.append(rest_item)
####
self.dashout('originals', series=originals, debuggy=False)
self.dashout('dupes', series=dupes, debuggy=False)
self.show_relations(originals)
self.announce('Do some actual logistics ...')
def show_relations(self, originals):
self.dashout('relations', debuggy=False)
for original in originals:
if original.duplicates:
self.spout(f' - {original}', debuggy=False)
for duplicate in original.duplicates:
self.debug(f'\t\t-- {duplicate}', debuggy=False)
else:
self.debug(f' - {original}', debuggy=False, color_num=34)
# region utils
@staticmethod
def spout(*args, color_num=92, debuggy=True):
if dobug:
debuggy = False # False enables debugging! ;-)
if debuggy:
return
# spout in glorious colors:
print(f'\033[{color_num}m {" ".join(map(str, args))}\033[0m')
def error(self, *args, color_num=91, debuggy=False):
self.spout(*args, color_num=color_num, debuggy=debuggy)
def announce(self, *args, color_num=93):
self.spout(*args, color_num=color_num, debuggy=False)
def debug(self, *args, color_num=90, debuggy=True):
self.spout(*args, color_num=color_num, debuggy=debuggy)
def dashout(self, arg, series=None, debuggy=True):
dash_ = '-' * (80 - len(arg) - 2)
self.spout(f'{arg}: {dash_}', debuggy=debuggy)
if series:
for item in series:
self.debug(f' - {item}', debuggy=debuggy)
# endregion utils
############################################################
def generate_victims(self):
txt = self.generatext
self.debug(f' - txt: {txt}')
destin_dir = Path(__file__).parent / 'victims'
destin_dir.mkdir(parents=True, exist_ok=True)
lines = txt.splitlines()
self.debug('-' * 80)
for line in lines:
line = line.strip()
if not line:
continue
self.debug(f' - line: "{line}"')
file = destin_dir / line
file.touch()
@property
def generatext(self) -> str:
txt = """
faang.png
faang copy.png
index copy.html
index copy 2.html
index copy 3.html
package.abc
package.json
package copy.json
package copy 2.json
package copy 2.xyz
README.md
README copy.md
Screenshot 2023-09-04 at 3.01.45 PM copy
Screenshot 2023-09-04 at 3.01.45 PM copy 2
suffixy 12.34
suffixy 12.34 copy
suffixy 12.34 copy 2
suffixy 12.34 xyz
suffixy 12.34 xyz copy
suffixy 12.34 xyz copy 2
abc
abc copy
abc copy 2
def.aaa.bbb
def.aaa.bbb.ccc
"""
return txt
if __name__ == '__main__':
Dupletor.spawn()
@jakob-hede
Copy link
Author

Output:

/usr/bin/python3 /opt/projects/challenge_duplicate_file_deletor/py/dupletor.py victims
spawn Dupletor
duplete "/opt/projects/challenge_duplicate_file_deletor/py/victims"
originals: ---------------------------------------------------------------------

  • <.... "README.md">
  • <.... "abc">
  • <.... "def.aaa.bbb">
  • <.... "def.aaa.bbb.ccc">
  • <.... "faang.png">
  • <.... "package.abc">
  • <.... "package.json">
  • <.... "suffixy 12.34">
  • <.... "suffixy 12.34 xyz">
  • <COPY "Screenshot 2023-09-04 at 3.01.45 PM copy">
  • <COPY "index copy.html">
  • <COPY "package copy 2.xyz">
    dupes: -------------------------------------------------------------------------
  • <COPY "README copy.md">
  • <COPY "Screenshot 2023-09-04 at 3.01.45 PM copy 2">
  • <COPY "abc copy">
  • <COPY "abc copy 2">
  • <COPY "faang copy.png">
  • <COPY "index copy 2.html">
  • <COPY "index copy 3.html">
  • <COPY "package copy.json">
  • <COPY "package copy 2.json">
  • <COPY "suffixy 12.34 copy">
  • <COPY "suffixy 12.34 copy 2">
  • <COPY "suffixy 12.34 xyz copy">
  • <COPY "suffixy 12.34 xyz copy 2">
    Do some actual logistics ...

Process finished with exit code 0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment