Skip to content

Instantly share code, notes, and snippets.

@inversion
Last active April 23, 2022 21:38
Show Gist options
  • Save inversion/4f2ef9264f0ad9b2360e to your computer and use it in GitHub Desktop.
Save inversion/4f2ef9264f0ad9b2360e to your computer and use it in GitHub Desktop.
Delete fdupes duplicates by directory
#!/usr/bin/env python
import fileinput
import os.path as path
import re
class Processor(object):
def __init__(self):
self.condemnedFiles = []
self.condemnedDirs = {}
def prompt_for_choice(self, validChoices):
choice = None
condemnDir = False
while choice is None:
choice = raw_input('Enter choice [%d-%d][d]:' % (min(validChoices.keys()), max(validChoices.keys())))
choiceMatch = re.match(r'^(\d+)(d?)$', choice)
if not choiceMatch:
choice = None
continue
dupeIndex = int( choiceMatch.group(1) )
if dupeIndex not in validChoices:
choice = None
continue
if len( choiceMatch.group(2) ):
condemnDir = True
toDelete = validChoices[dupeIndex]
del validChoices[dupeIndex]
self.condemnedFiles.append(toDelete)
print 'Will delete %s' % toDelete
if condemnDir:
directory = path.split( toDelete )[ 0 ]
self.condemnedDirs[directory] = True
print 'Will delete all subsequent duplicates found in %s' % directory
def run(self):
group = []
groupIndex = 1
for line in fileinput.input():
# Trim trailing newline
line = line[:-1]
if not len( line ):
validChoices = {}
print ''
print '--- Group %d ---' % groupIndex
condemnedInGroup = 0
for index, dupe in enumerate(group):
index += 1
directory = path.split( dupe )[ 0 ]
if directory in self.condemnedDirs:
if condemnedInGroup == len(group) - 1:
print 'All duplicates in group are in condemned directories, preserving %s so there is a file left from the group' % dupe
validChoices[index] = dupe
break
print '%s is in a condemned directory so this duplicate will be deleted' % dupe
self.condemnedFiles.append(dupe)
condemnedInGroup += 1
else:
validChoices[index] = dupe
print '[%d] %s' % ( index, dupe )
while len(validChoices) > 1:
self.prompt_for_choice(validChoices)
print 'No duplicates of %s left to delete' % validChoices.values()[0]
group = []
groupIndex += 1
continue
group.append(line)
if __name__ == '__main__':
OUTPUT_FILENAME = 'toDelete.log'
print 'fdupes output processor to select duplicates to delete and remember which directories are unwanted.'
print 'Usage:'
print ''
print 'fdupes -r . > fdupes_out.log'
print 'python fdupes_delete_by_directory.py fdupes_out.log'
print ''
print 'Entering "d" following a duplicate name will cause all subsequent duplicates in that directory to be condemned for deletion.'
print 'Files will not actually be deleted by this script, it will save %s containing the condemned files.' % OUTPUT_FILENAME
print ''
print 'Examples of how to process %s in shell:' % OUTPUT_FILENAME
print ''
print '# Delete all duplicates'
print 'while read f; do rm "$f"; done <%s' % OUTPUT_FILENAME
print ''
print '# Move all files to a "dead" directory while preserving the original directory structure'
print 'while read f; do mkdir -p "../dead/$(dirname $f)" && mv -v "$f" "../dead/$(dirname $f)/"; done <%s' % OUTPUT_FILENAME
print ''
processor = Processor()
processor.run()
with open( OUTPUT_FILENAME, 'w') as fp:
fp.write('\n'.join(processor.condemnedFiles) + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment