Created
November 21, 2016 01:23
-
-
Save skinkade/a9a0ebf8855aad59edaa3cf86ca0a022 to your computer and use it in GitHub Desktop.
pHash utility in Perl6 to detect duplicate/similar images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use NativeCall; | |
### DCT hashing & Hamming | |
# int ph_dct_imagehash(const char* file,ulong64 &hash); | |
sub ph_dct_imagehash(Str, uint64 is rw) | |
is native('pHash', v0) | |
returns int16 | |
{ * } | |
# int ph_hamming_distance(const ulong64 hash1,const ulong64 hash2); | |
sub ph_hamming_distance(uint64, uint64) | |
is native('pHash', v0) | |
returns int16 | |
{ * } | |
sub MAIN(*@paths) { | |
my $images := gather while ( @paths ) { | |
my $current = @paths.pop; | |
take $current if ($current.IO.f && $current.IO.extension.lc eq any <jpg jpeg png>); | |
@paths.append(dir($current)>>.path) if $current.IO.d; | |
} | |
my %dct; | |
for $images.race -> $file { | |
my uint64 $hash; | |
if !(ph_dct_imagehash($file, $hash) == 1) { | |
%dct{$file} = $hash if ($hash); | |
} else { | |
say "Could not create DCT hash for: $file"; | |
} | |
} | |
for %dct.keys.combinations(2).race { | |
my $ref = $_[0]; | |
my $ref_hash = %dct{$ref}; | |
my $check = $_[1]; | |
my $check_hash = %dct{$check}; | |
if ($ref_hash == $check_hash) { | |
say "Exact match:\n$ref\n$check\n"; | |
next; | |
} | |
my $distance = ph_hamming_distance($ref_hash, $check_hash); | |
if ($distance <= 10) { | |
say "Likely match ($distance):\n$ref\n$check\n"; | |
} elsif ($distance <= 22) { | |
say "Possible match ($distance):\n$ref\n$check\n"; | |
} | |
} | |
say now - INIT now; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment