Skip to content

Instantly share code, notes, and snippets.

@d0k
Created June 24, 2010 14:31
Show Gist options
  • Save d0k/451517 to your computer and use it in GitHub Desktop.
Save d0k/451517 to your computer and use it in GitHub Desktop.
Turn duplicate files into hardlinks
#!/usr/bin/perl
use strict;
use warnings;
use File::Find;
use Digest::MD5;
my %files;
sub wanted {
return if /^\.+$/; # Skip . and ..
my $base = $_;
my $file = $File::Find::name;
if (!exists($files{$base})) {
$files{$base} = [ $file ];
} else {
push(@{$files{$base}}, $file);
}
}
# Gather files.
find(\&wanted, @ARGV);
for my $base (keys %files) {
my @paths = @{$files{$base}};
if (@paths > 1) {
# Found duplicate filenames, check hashes to make sure the files are equal.
my %hashes;
for my $file (@paths) {
next if !(-f $file); # Plain files only.
open(my $handle, '<', $file) or next;
binmode($handle);
eval { $hashes{$file} = Digest::MD5->new->addfile(*$handle)->digest; }
}
next if (!exists($hashes{$paths[0]}));
for my $hashedfile (keys %hashes) {
if ($hashes{$hashedfile} eq $hashes{$paths[0]}
and $hashedfile ne $paths[0]) {
print "$hashedfile -> $paths[0].";
if (unlink($hashedfile) and link($paths[0], $hashedfile)) {
print " ok.\n";
} else {
print " failed: $!.\n";
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment