public
Created — forked from jchris/hashfiles.rb

  • Download Gist
hashfiles.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14
require 'digest/md5'
 
# usage: run this in the root directory of your iTunes Music folder, or wherever, and pipe the output to a file
# next, pipe the output of that file through `sort` to a new file
# now, use the next script on that file
 
ls = Dir['**/*']
 
ls.each_with_index do |f, i|
STDERR.puts ls.length - i if (i % 100 == 0)
next if File.directory?(f)
md5= Digest::MD5.hexdigest(File.read(f))
puts "#{md5} #{f}"
end
removedupes.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
# use at your own risk!
# read the code
# enjoy!
 
# usage: cat filesort.txt | ruby removedupes.rb
# where filesort.txt is the output of the `sort` command from the last script
 
lasthash = "x"
lastname = "z"
doput = false
group = []
while line = gets
line = line.chomp
lp = line.split(' ')
hash = lp.shift
name = lp.join(' ')
if hash == lasthash
group << lastname
doput = true
else
if doput
group << lastname
# process the group of identical files to delete all but the one with the shortest pathname
group.sort! do |a, b|
b.length - a.length
end
keep = group.pop
group.each do |f|
begin
File.delete(f)
rescue
# somewhere in the toolchain, things like double-spaces are getting collapsed to single spaces
# when I used this, there were 40-ish files I had to delete by hand
# by grepping the output of this script for ^error and using shell-completion to
# get the proper filename to delete
puts "error #{f}"
end
end
puts "kept #{keep}"
end
group = []
doput = false
end
lasthash = hash
lastname = name
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.