Skip to content

Instantly share code, notes, and snippets.

@tierra
Created September 30, 2014 22:11
Show Gist options
  • Save tierra/b90c4aeb73c7ad8dcef3 to your computer and use it in GitHub Desktop.
Save tierra/b90c4aeb73c7ad8dcef3 to your computer and use it in GitHub Desktop.
Recursively remove duplicate files
#!/usr/bin/env python
import os
from optparse import OptionParser
def remove_duplicates(working_path, compare_path, pretend = False,
compare_size = False, recurse = False):
'''Compares contents of two paths, removing
identical content in the working path.'''
contents = os.listdir(working_path)
files = [f for f in contents if
os.path.isfile(os.path.join(working_path, f)) and
os.path.exists(os.path.join(compare_path, f)) and
os.path.isfile(os.path.join(compare_path, f))]
for file in files:
working_file = os.path.join(working_path, file)
compare_file = os.path.join(compare_path, file)
if compare_size:
if os.path.getsize(working_file) == os.path.getsize(compare_file):
print 'Deleting %s' % working_file
if not pretend: os.unlink(working_file)
else:
print 'Deleting %s' % working_file
if not pretend: os.unlink(working_file)
if recurse:
directories = [d for d in contents if
os.path.isdir(os.path.join(working_path, d)) and
os.path.exists(os.path.join(compare_path, d)) and
os.path.isdir(os.path.join(compare_path, d))]
for dir in directories:
working_dir = os.path.join(working_path, dir)
remove_duplicates(
working_dir, os.path.join(compare_path, dir),
pretend, compare_size, recurse)
if not os.listdir(working_dir):
print 'Removing empty directory: %s' % working_dir
os.rmdir(working_dir)
if __name__ == '__main__':
parser = OptionParser(
usage = '%prog [-p] [-s] [-r] working_path compare_path',
version = '%prog 0.1')
parser.add_option('-p', '--pretend',
action = "store_true", dest = "pretend", default = False,
help = 'Shows the files which will be deleted without deleting them.')
parser.add_option('-s', '--size',
action = "store_true", dest = "compare_size", default = False,
help = 'Compares the file sizes in addition to names.')
parser.add_option('-r', '--recursive',
action = "store_true", dest = "recurse", default = False,
help = 'Make a recursive comparison through directories.')
(options, args) = parser.parse_args()
if len(args) != 2:
parser.error('Incorrect number of arguments.')
remove_duplicates(args[0], args[1], options.pretend,
options.compare_size, options.recurse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment