Skip to content

Instantly share code, notes, and snippets.

@filcuc
Created August 29, 2017 05:12
Show Gist options
  • Save filcuc/e788bbc84fbcd5f9c923ee313d2436dc to your computer and use it in GitHub Desktop.
Save filcuc/e788bbc84fbcd5f9c923ee313d2436dc to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import argparse
import os
import shutil
import sys
def parse_arguments():
parser = argparse.ArgumentParser(description='Process a directory for images')
parser.add_argument('--input_dir')
parser.add_argument('--output_dir')
args = parser.parse_args()
if not args.input_dir or not args.output_dir:
parser.print_usage()
sys.exit(-1)
if not os.path.exists(args.input_dir):
print('Input directory {0} does not exist'.format(args.input_dir))
sys.exit(-1)
if not os.path.exists(args.output_dir):
print('Output directory {0} does not exist'.format(args.output_dir))
sys.exit(-1)
return args.input_dir, args.output_dir
def find_duplicates(files):
result = []
files_with_names = [(f, os.path.basename(f)) for f in files]
work_list = files_with_names.copy()
for pair in files_with_names:
work_list = [x for x in work_list if x != pair]
duplicates = [d for d in work_list if d[0] != pair[0] and d[1] == pair[1]]
if duplicates:
work_list = [x for x in work_list if x not in duplicates]
duplicates.append(pair)
duplicates = [x[0] for x in duplicates]
result.append(duplicates)
return result
def find_conflicting(files):
for file in find_duplicates(files):
temp = set([os.path.getsize(x) for x in file])
if len(temp) > 1:
print(file)
return True
def main():
input_dir, output_dir = parse_arguments()
to_copy = []
for root, dir, files in os.walk(input_dir):
images = [f for f in files if os.path.splitext(f)[1] in ['.jpg']]
to_copy += [os.path.join(root, f) for f in images]
find_conflicting(to_copy)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment