Skip to content

Instantly share code, notes, and snippets.

@pen
Created February 1, 2010 11:11
Show Gist options
  • Save pen/291608 to your computer and use it in GitHub Desktop.
Save pen/291608 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
#
# usage: find-dup <dir> [<dir>...]
#
use strict;
use File::Find;
use IO::File;
use Digest;
my $digest_method = "MD5";
my $least_size = 1;
{
my %same_size;
my %same_digest;
find(
sub {
if (-f $File::Find::name && -s _ >= $least_size) {
push @{ $same_size{-s _} ||= [ ] }, $File::Find::name;
}
},
@ARGV,
);
while (my ($size, $files) = each %same_size) {
next if @$files < 2;
for my $path (@$files) {
my $digest = Digest->new($digest_method)
->addfile(IO::File->new($path, "r"))
->hexdigest
;
push @{ $same_digest{$digest} ||= [ ] }, { path => $path, size => $size };
}
}
for my $files (values %same_digest) {
next if @$files < 2;
for my $info (@$files) {
print "$info->{size}\t$info->{path}\n";
}
print "-" x 80, "\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment