Created
March 7, 2011 19:30
-
-
Save fxn/859046 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
require 'digest/md5' | |
# Let original be a collection with the names of the files in the | |
# original directory. Let target be the ones in the target directory. | |
# | |
# We want to find which files in original are equal to some in target | |
# except perhaps for the filename. | |
osizes, tsizes = classify_by_size(original, target) | |
osizes.each do |size, os_fnames| | |
next unless tsizes.key?(size) | |
omd5s, tmd5s = classify_by_md5(os_fnames, tsizes[size]) | |
omd5s.each do |md5, om_fnames| | |
next unless tmd5s.key?(md5) | |
om_fnames.each do |om_fname| | |
tmd5s[md5].each do |tm_fname| | |
puts "#{tm_fname} is the same as #{om_fname}" if File.compare(om_fname, tm_fname) | |
end | |
end | |
end | |
end | |
def classify_by_size(original, target) | |
[original, target].map do |set| | |
set.classify { |fname| File.size(fname) } | |
end | |
end | |
def classify_by_md5(original, target) | |
[original, target].map do |set| | |
set.classify { |fname| Digest::MD5.file(fname).hexdigest } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment