afresh1/clean-dupes.pl

## clean-dupes.pl
#!/usr/bin/env perl
use v5.16;
use warnings;

#
# Copyright (c) 2021 Andrew Hewus Fresh <andrew@afresh1.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

use File::Find qw< find >;
use Digest::SHA;

HotKey->import('readkey');

@ARGV = '.' unless @ARGV;

my $alg = 'sha256';

my %files;
find( sub {
    return unless -f $_;

    my %info = (
        #name => $_,
        #dir  => $File::Find::dir,
        path => $File::Find::name,
    );

    #@info{qw< size atime mtime ctime >} = ( stat _ )[ 7 .. 10 ];

    push @{ $files{ -s _ } }, \%info;
}, @ARGV );

foreach my $size ( sort { $b <=> $a } keys %files ) {
    next unless @{ $files{$size} } > 1;

    my %dupes;
    foreach my $info ( @{ $files{$size} } ) {
        my $digest = Digest::SHA->new($alg)
            ->addfile( $info->{path} )
            ->hexdigest;
        push @{ $dupes{$digest} }, $info;
    }

    foreach my $digest ( sort keys %dupes ) {
        next unless @{ $dupes{$digest} } > 1;

        my $delete = pick_dupes( $dupes{$digest} );
        unlink or warn "Unable to unlink $_: $!"
            for map { $_->{path} } @{$delete};
    }
}

sub pick_dupes {
    my ($dupes) = @_;

    my $n     = 0;
    my @dupes = map { $_->{n} = ++$n; $_ }
        sort { length $b->{path} <=> length $a->{path} } @{$dupes};
    $dupes[0]{keep} = 1;

    die "Too many dupes!" if $n > 9;

    while (1) {
        say "\n   * indicates files to keep.";
        printf "%1s [%1s] %s\n", $_->{n}, $_->{keep} ? '*' : ' ', $_->{path}
            for @dupes;

        printf
            "1-$n to toggle, s[w]ap, keep [a]ll [n]one, [d]elete, [q]uit: ";
        STDOUT->flush;

        my $key = readkey();
        print "\n";

        if ( $key =~ /^[1-9]$/ and $#dupes >= $key - 1 ) {
            my $i = $key - 1;
            $dupes[$i]{keep} = $dupes[$i]{keep} ? undef : 1;
        }
        elsif ( fc $key eq 'w' ) {
            $_->{keep} = !$_->{keep} for @dupes;
        }
        elsif ( fc $key eq 'a' ) {
            $_->{keep} = 1 for @dupes;
            $key = 'd';
        }
        elsif ( fc $key eq 'n' ) {
            delete $_->{keep} for @dupes;
            $key = 'd';
        }
        elsif ( fc $key eq 'q' ) {
            exit;
        }

        if ( fc $key eq 'd' ) {
            my $sure = 'y';
            if ( !grep { $_->{keep} } @dupes ) {
                print "Are you sure you want to remove all files? y/n ";
                STDOUT->flush;
                $sure = readkey();
                print "\n";
            }
            return [ grep { !$_->{keep} } @dupes ]
                if fc $sure eq 'y';
        }
    }
}

BEGIN {
# HotKey.pm - From perlfaq8
package HotKey;

use strict;
use warnings;

use parent 'Exporter';
our @EXPORT_OK = qw(cbreak cooked readkey);

use POSIX qw(:termios_h);
my ( $term, $oterm, $echo, $noecho, $fd_stdin );

$fd_stdin = fileno(STDIN);
$term     = POSIX::Termios->new();
$term->getattr($fd_stdin);
$oterm = $term->getlflag();

$echo   = ECHO | ECHOK | ICANON;
$noecho = $oterm & ~$echo;

sub cbreak {

    #$term->setlflag($noecho);  # ok, so i don't want echo either
    $term->setlflag( $oterm & ~ICANON );
    $term->setcc( VTIME, 1 );
    $term->setattr( $fd_stdin, TCSANOW );
}

sub cooked {
    $term->setlflag($oterm);
    $term->setcc( VTIME, 0 );
    $term->setattr( $fd_stdin, TCSANOW );
}

sub readkey {
    my $key = '';
    cbreak();
    sysread( STDIN, $key, 1 );
    cooked();
    return $key;
}

END { cooked() }

1;
}
	#!/usr/bin/env perl
	use v5.16;
	use warnings;

	#
	# Copyright (c) 2021 Andrew Hewus Fresh <andrew@afresh1.com>
	#
	# Permission to use, copy, modify, and distribute this software for any
	# purpose with or without fee is hereby granted, provided that the above
	# copyright notice and this permission notice appear in all copies.
	#
	# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
	# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
	# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
	# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
	# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
	# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
	# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
	#

	use File::Find qw< find >;
	use Digest::SHA;

	HotKey->import('readkey');

	@ARGV = '.' unless @ARGV;

	my $alg = 'sha256';

	my %files;
	find( sub {
	return unless -f $_;

	my %info = (
	#name => $_,
	#dir => $File::Find::dir,
	path => $File::Find::name,
	);

	#@info{qw< size atime mtime ctime >} = ( stat _ )[ 7 .. 10 ];

	push @{ $files{ -s _ } }, \%info;
	}, @ARGV );

	foreach my $size ( sort { $b <=> $a } keys %files ) {
	next unless @{ $files{$size} } > 1;

	my %dupes;
	foreach my $info ( @{ $files{$size} } ) {
	my $digest = Digest::SHA->new($alg)
	->addfile( $info->{path} )
	->hexdigest;
	push @{ $dupes{$digest} }, $info;
	}

	foreach my $digest ( sort keys %dupes ) {
	next unless @{ $dupes{$digest} } > 1;

	my $delete = pick_dupes( $dupes{$digest} );
	unlink or warn "Unable to unlink $_: $!"
	for map { $_->{path} } @{$delete};
	}
	}

	sub pick_dupes {
	my ($dupes) = @_;

	my $n = 0;
	my @dupes = map { $_->{n} = ++$n; $_ }
	sort { length $b->{path} <=> length $a->{path} } @{$dupes};
	$dupes[0]{keep} = 1;

	die "Too many dupes!" if $n > 9;

	while (1) {
	say "\n * indicates files to keep.";
	printf "%1s [%1s] %s\n", $_->{n}, $_->{keep} ? '*' : ' ', $_->{path}
	for @dupes;

	printf
	"1-$n to toggle, s[w]ap, keep [a]ll [n]one, [d]elete, [q]uit: ";
	STDOUT->flush;

	my $key = readkey();
	print "\n";

	if ( $key =~ /^[1-9]$/ and $#dupes >= $key - 1 ) {
	my $i = $key - 1;
	$dupes[$i]{keep} = $dupes[$i]{keep} ? undef : 1;
	}
	elsif ( fc $key eq 'w' ) {
	$_->{keep} = !$_->{keep} for @dupes;
	}
	elsif ( fc $key eq 'a' ) {
	$_->{keep} = 1 for @dupes;
	$key = 'd';
	}
	elsif ( fc $key eq 'n' ) {
	delete $_->{keep} for @dupes;
	$key = 'd';
	}
	elsif ( fc $key eq 'q' ) {
	exit;
	}

	if ( fc $key eq 'd' ) {
	my $sure = 'y';
	if ( !grep { $_->{keep} } @dupes ) {
	print "Are you sure you want to remove all files? y/n ";
	STDOUT->flush;
	$sure = readkey();
	print "\n";
	}
	return [ grep { !$_->{keep} } @dupes ]
	if fc $sure eq 'y';
	}
	}
	}

	BEGIN {
	# HotKey.pm - From perlfaq8
	package HotKey;

	use strict;
	use warnings;

	use parent 'Exporter';
	our @EXPORT_OK = qw(cbreak cooked readkey);

	use POSIX qw(:termios_h);
	my ( $term, $oterm, $echo, $noecho, $fd_stdin );

	$fd_stdin = fileno(STDIN);
	$term = POSIX::Termios->new();
	$term->getattr($fd_stdin);
	$oterm = $term->getlflag();

	$echo = ECHO \| ECHOK \| ICANON;
	$noecho = $oterm & ~$echo;

	sub cbreak {

	#$term->setlflag($noecho); # ok, so i don't want echo either
	$term->setlflag( $oterm & ~ICANON );
	$term->setcc( VTIME, 1 );
	$term->setattr( $fd_stdin, TCSANOW );
	}

	sub cooked {
	$term->setlflag($oterm);
	$term->setcc( VTIME, 0 );
	$term->setattr( $fd_stdin, TCSANOW );
	}

	sub readkey {
	my $key = '';
	cbreak();
	sysread( STDIN, $key, 1 );
	cooked();
	return $key;
	}

	END { cooked() }

	1;
	}