Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
require 'digest/md5'
# usage: run this in the root directory of your iTunes Music folder, or wherever, and pipe the output to a file
# next, pipe the output of that file through `sort` to a new file
# now, use the next script on that file
ls = Dir['**/*']
ls.each_with_index do |f, i|
STDERR.puts ls.length - i if (i % 100 == 0)
next if
md5= Digest::MD5.hexdigest(
puts "#{md5} #{f}"
# use at your own risk!
# read the code
# enjoy!
# usage: cat filesort.txt | ruby removedupes.rb
# where filesort.txt is the output of the `sort` command from the last script
lasthash = "x"
lastname = "z"
doput = false
group = []
while line = gets
line = line.chomp
lp = line.split(' ')
hash = lp.shift
name = lp.join(' ')
if hash == lasthash
group << lastname
doput = true
if doput
group << lastname
# process the group of identical files to delete all but the one with the shortest pathname
group.sort! do |a, b|
b.length - a.length
keep = group.pop
group.each do |f|
# somewhere in the toolchain, things like double-spaces are getting collapsed to single spaces
# when I used this, there were 40-ish files I had to delete by hand
# by grepping the output of this script for ^error and using shell-completion to
# get the proper filename to delete
puts "error #{f}"
puts "kept #{keep}"
group = []
doput = false
lasthash = hash
lastname = name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment