Last active
April 23, 2022 21:38
-
-
Save inversion/4f2ef9264f0ad9b2360e to your computer and use it in GitHub Desktop.
Delete fdupes duplicates by directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import fileinput | |
import os.path as path | |
import re | |
class Processor(object): | |
def __init__(self): | |
self.condemnedFiles = [] | |
self.condemnedDirs = {} | |
def prompt_for_choice(self, validChoices): | |
choice = None | |
condemnDir = False | |
while choice is None: | |
choice = raw_input('Enter choice [%d-%d][d]:' % (min(validChoices.keys()), max(validChoices.keys()))) | |
choiceMatch = re.match(r'^(\d+)(d?)$', choice) | |
if not choiceMatch: | |
choice = None | |
continue | |
dupeIndex = int( choiceMatch.group(1) ) | |
if dupeIndex not in validChoices: | |
choice = None | |
continue | |
if len( choiceMatch.group(2) ): | |
condemnDir = True | |
toDelete = validChoices[dupeIndex] | |
del validChoices[dupeIndex] | |
self.condemnedFiles.append(toDelete) | |
print 'Will delete %s' % toDelete | |
if condemnDir: | |
directory = path.split( toDelete )[ 0 ] | |
self.condemnedDirs[directory] = True | |
print 'Will delete all subsequent duplicates found in %s' % directory | |
def run(self): | |
group = [] | |
groupIndex = 1 | |
for line in fileinput.input(): | |
# Trim trailing newline | |
line = line[:-1] | |
if not len( line ): | |
validChoices = {} | |
print '' | |
print '--- Group %d ---' % groupIndex | |
condemnedInGroup = 0 | |
for index, dupe in enumerate(group): | |
index += 1 | |
directory = path.split( dupe )[ 0 ] | |
if directory in self.condemnedDirs: | |
if condemnedInGroup == len(group) - 1: | |
print 'All duplicates in group are in condemned directories, preserving %s so there is a file left from the group' % dupe | |
validChoices[index] = dupe | |
break | |
print '%s is in a condemned directory so this duplicate will be deleted' % dupe | |
self.condemnedFiles.append(dupe) | |
condemnedInGroup += 1 | |
else: | |
validChoices[index] = dupe | |
print '[%d] %s' % ( index, dupe ) | |
while len(validChoices) > 1: | |
self.prompt_for_choice(validChoices) | |
print 'No duplicates of %s left to delete' % validChoices.values()[0] | |
group = [] | |
groupIndex += 1 | |
continue | |
group.append(line) | |
if __name__ == '__main__': | |
OUTPUT_FILENAME = 'toDelete.log' | |
print 'fdupes output processor to select duplicates to delete and remember which directories are unwanted.' | |
print 'Usage:' | |
print '' | |
print 'fdupes -r . > fdupes_out.log' | |
print 'python fdupes_delete_by_directory.py fdupes_out.log' | |
print '' | |
print 'Entering "d" following a duplicate name will cause all subsequent duplicates in that directory to be condemned for deletion.' | |
print 'Files will not actually be deleted by this script, it will save %s containing the condemned files.' % OUTPUT_FILENAME | |
print '' | |
print 'Examples of how to process %s in shell:' % OUTPUT_FILENAME | |
print '' | |
print '# Delete all duplicates' | |
print 'while read f; do rm "$f"; done <%s' % OUTPUT_FILENAME | |
print '' | |
print '# Move all files to a "dead" directory while preserving the original directory structure' | |
print 'while read f; do mkdir -p "../dead/$(dirname $f)" && mv -v "$f" "../dead/$(dirname $f)/"; done <%s' % OUTPUT_FILENAME | |
print '' | |
processor = Processor() | |
processor.run() | |
with open( OUTPUT_FILENAME, 'w') as fp: | |
fp.write('\n'.join(processor.condemnedFiles) + '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment