Skip to content

Instantly share code, notes, and snippets.

@esaborit4code
Last active September 28, 2018 18:47
Show Gist options
  • Save esaborit4code/f90fab9b2fe079639ea91a7c1cc75f67 to your computer and use it in GitHub Desktop.
Save esaborit4code/f90fab9b2fe079639ea91a7c1cc75f67 to your computer and use it in GitHub Desktop.
Ignore or rename files with duplicated name
#!/usr/bin/env ruby
require 'fileutils'
require 'byebug'
VERBOSE = false
DRY_RUN = false
class Conflict
SUFFIX_SEPARATOR = '-'
require 'digest'
# https://github.com/tonytonyjan/exif
# gem install exif
require 'exif'
def initialize(input_file_path, original_output_file_path, conflicts_output_path, suffix = nil)
@suffix = suffix
@input_file_path = input_file_path
@file_name = File.basename @input_file_path
@original_output_file_path = original_output_file_path
@output_dir = "#{File.dirname @original_output_file_path}/"
@output_file_path = suffixed_output_file_path(@suffix)
@conflicts_output_path = conflicts_output_path
end
def resolve
if conflicted?
copy_to "#{@conflicts_output_path}#{@file_name}"
return :copied_to_conflicts
end
return :duplicated if duplicated?
resolve_with_new_name
end
private
def conflicted?
same_date? && !same_size?
end
def duplicated?
(same_date? && same_size?) || same_sha?
end
def resolve_with_new_name
new_suffix = @suffix.to_i + 1
new_output_file_path = suffixed_output_file_path(new_suffix)
if File.exist?(new_output_file_path)
Conflict.new(@input_file_path, @original_output_file_path, @conflicts_output_path, new_suffix).resolve
else
copy_to new_output_file_path
:copied_with_new_name
end
end
def same_size?
File.size(@input_file_path) == File.size(@output_file_path)
end
def same_date?
input_date = exif_file_date(@input_file_path)
output_date = exif_file_date(@output_file_path)
return false if input_date.nil? && output_date.nil?
input_date == output_date
end
def same_sha?
# NOTE: MD5 seems to change each time a file is imported
Digest::MD5.file(@input_file_path) == Digest::MD5.file(@output_file_path)
end
def exif_file_date(file_path)
Exif::Data.new(File.open(file_path)).date_time_original
rescue Exif::NotReadable
nil
end
def suffixed_file_name(file_name, suffix)
return file_name unless suffix
file_extension = File.extname(file_name)
file_name_without_extension = File.basename(file_name, file_extension)
"#{file_name_without_extension}#{SUFFIX_SEPARATOR}#{suffix}#{file_extension}"
end
def suffixed_output_file_path(suffix)
"#{@output_dir}#{suffixed_file_name(@file_name, suffix)}"
end
def copy_to(output_file_path)
copy(@input_file_path, output_file_path)
end
end
def copy(from, to)
FileUtils.copy(from, to, noop: DRY_RUN, verbose: VERBOSE)
end
def run
input_path = ARGV[0].dup
input_path = "#{Dir.pwd}/#{input_path}" unless input_path.start_with?('/')
input_path.gsub!('//', '/')
input_file_names = Dir["#{input_path}*"].select { |file_path| File.file? file_path }
.map { |file_path| File.basename file_path }
output_path = ARGV[1].dup
output_path = "#{Dir.pwd}/#{output_path}" unless output_path.start_with?('/')
output_path.gsub!('//', '/')
output_file_names = Dir["#{output_path}*"].select { |file_path| File.file? file_path }
.map { |file_path| File.basename file_path }
conflicts_output_path = "#{output_path}conflicts/"
Dir.mkdir(conflicts_output_path) unless File.exist?(conflicts_output_path)
conflicting_file_names = input_file_names & output_file_names
non_conflicting_file_names = input_file_names - conflicting_file_names
puts "Copying #{non_conflicting_file_names.size} files without conflicts"
non_conflicting_file_names.each do |file_name|
input_file_path = "#{input_path}#{file_name}"
output_file_path = "#{output_path}#{file_name}"
copy(input_file_path, output_file_path)
end
puts "Resolving #{conflicting_file_names.size} file name conflicts"
results = {
duplicated: [],
copied_with_new_name: [],
copied_to_conflicts: []
}
conflicting_file_names.each do |file_name|
input_file_path = "#{input_path}#{file_name}"
output_file_path = "#{output_path}#{file_name}"
resolution = Conflict.new(input_file_path, output_file_path, conflicts_output_path).resolve
results[resolution] << file_name
end
puts "\t#{results[:duplicated].size} duplicates were ignored\n"\
"\t#{results[:copied_with_new_name].size} were copied with new name\n"\
"\t#{results[:copied_to_conflicts].size} were copied to conflicts"
end
run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment