Last active
January 21, 2016 03:14
-
-
Save codepedia/ec5bf6fc0e58a8e1ebbd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def stat(path) | |
if File.exist?(path) | |
File.stat(path.to_s) | |
else | |
nil | |
end | |
end | |
#!/usr/bin/ruby | |
## This file finds all the duplicate files form a directory given | |
## at the command line. | |
## Released under the GPLv2 | |
## Copyright (C) tuxdna(at)gmail(dot)com | |
require 'digest/md5' | |
require 'fileutils' | |
## novice use of exceptions | |
begin | |
throw nil if ARGV.length == 0 | |
rescue | |
print "Usage: ", $0, " \n" | |
exit 1 | |
end | |
directory = ARGV[0] | |
print "Name of directory given is :", directory, "\n" | |
trashFolder = if ARGV.length > 1 then ARGV[1] else nil end | |
## do not proceed if it is not a directory | |
exit 1 if File.file?(directory) | |
puts "Getting the list recursively, for all the files and sub-directories." | |
filelist = Dir[directory+"/**/*"] | |
puts "Now scanning the files: " | |
puts "Determining file size and Filtering the directories:" | |
sizehash = Hash.new { |h,k| h[k] = [] } | |
filelist.each do |filename| | |
if File.file?(filename) | |
sizehash[File.size(filename)].push(filename) | |
end | |
end | |
## prune those entries which do not have same size | |
sizehash.delete_if { |k,v| v.length <= 1 } | |
duplicates_md5 = Hash.new { |h,k| h[k] = [] } | |
sizehash.each do | size, files | | |
files.each do |filename| | |
# md5sum = Digest::MD5.new( File.new(filename).read ) | |
md5sum = Digest::MD5.digest(File.new(filename).read ) | |
## Necessary to do this because md5sum is an object of class Digest::MD5 | |
## and we need a string for a key!! | |
md5sum = md5sum.to_s | |
duplicates_md5[md5sum].push(filename) | |
end | |
end | |
## prune those entries which do not have same md5 hash value | |
duplicates_md5.delete_if { |k, v| v.length <= 1 } | |
## print the files if we find duplicates now! | |
duplicates_md5.each do |h, files| | |
puts "Following files match: " | |
# to_delete = files | |
# files.each { |f| puts f } | |
# puts | |
keep = files.first | |
puts "keeping file: #{keep}" | |
files[1..-1].each do |f| | |
basename = File.basename(f) | |
if trashFolder.nil? then | |
puts "deleting file: #{f}" | |
FileUtils.rm(f) | |
else | |
puts "moving to trash: #{f} -> #{trashFolder}" | |
FileUtils.mv(f, File.join(trashFolder, basename)) | |
end | |
end | |
puts | |
end | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment