Skip to content

Instantly share code, notes, and snippets.

@RenatoExpert
Created January 27, 2024 16:10
Show Gist options
  • Save RenatoExpert/43b6f2058ec2fc6a442639bca4cce461 to your computer and use it in GitHub Desktop.
Save RenatoExpert/43b6f2058ec2fc6a442639bca4cce461 to your computer and use it in GitHub Desktop.
import os
import time
import hashlib
import csv
# Cache Handler
cache_file = "/tmp/cache_md5"
def load_cache():
cache = {}
with open(cache_file, newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
for row in spamreader:
file, hashed_sum = row
cache[file] = hashed_sum
return cache
def save_cache(data):
print("Saving cache at " + cache_file + "...")
with open(cache_file, 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=';',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for row in data:
file, hash_sum = row
spamwriter.writerow([file, hash_sum])
print("Cache saved with success")
# Render engine
terminal_size = os.get_terminal_size().columns
def render(pair, done, total):
start, end = pair
spacing = " " * (terminal_size - (len(start) + len(end) + 6))
info = start + spacing + "=> " + end
percent = round(100 * done/total, 1)
progress = "*** Current status: " + str(percent) + "% ***" + '\t'
print('\r' + info, end='')
print('\n' + progress, end='')
# Indexing all files
path_list = os.walk(".")
targets = []
for root, dirs, files in path_list:
for file in files:
path = os.path.join(root, file)
if not (".git" in path or ".config" in path or "Work"):
targets.append(path)
total = len(targets)
print(str(total) + " files found")
# Getting MD5 sum for each file
cache = load_cache()
hash_by_path = []
init_sums = time.time()
for file in targets:
hash_sum = ''
if file in cache:
hash_sum = cache[file]
else:
data = open(file, 'rb').read()
hash_sum = hashlib.md5(data).hexdigest()
pair = (file, hash_sum)
render(pair, len(hash_by_path), total)
hash_by_path.append(pair)
end_sums = time.time()
print("MD5 Hash sums done with success")
save_cache(hash_by_path)
# Reverse index
by_hash = {}
for pair in hash_by_path:
file, hash_sum = pair
if not hash_sum in by_hash:
by_hash[hash_sum] = [file]
else:
by_hash[hash_sum].append(file)
# Check identical files
def input_loop(question, options):
while True:
print(question)
answer = input("Input: ")
if answer in options:
return answer
else:
print("Bad answer")
for hash_sum in by_hash:
files = by_hash[hash_sum]
length = len(files)
if length != 1:
while True:
print('\t *** Time to work ***')
# Show repetitions
i = 1
for file in files:
print(str(i) + ')\t' + file)
i += 1
# Stay or jump
instructions = ["q - quit", "n - next", "s - select"]
for instruction in instructions:
print(instruction)
stay = input_loop("Work on that one or go next", ["q", "n", "s"])
match stay:
case "q":
exit()
case "n":
break
case "s":
# Choose file
question = "Choose a file to decide: [a - all] [1-" + str(length) + "]"
options = [str(number) for number in range(1, length + 1)]
options.append("a")
file_input = input_loop(question, options)
selected_files = files if file_input == "a" else [files[int(file_input) - 1]]
# Select action
instructions = ["d - delete forever", "k - keep as it is", "t - send to trash"]
for instruction in instructions:
print(instruction)
question = "Choose an action to that file"
options = ["d", "k", "t"]
selected_action = input_loop(question, options)
# Confirm
print("File: " + str(selected_files))
print("Action: " + selected_action)
confirm = input_loop("Are you sure? [y/n]", ["y", "n"])
if confirm == "y":
match selected_action:
case "d":
for file in selected_files:
print("Deleting " + file)
os.remove(file)
case "k":
print("Doing nothing...")
case "t":
print("Sending to trash...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment