Skip to content

Instantly share code, notes, and snippets.

@skinkade
Created November 21, 2016 01:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skinkade/a9a0ebf8855aad59edaa3cf86ca0a022 to your computer and use it in GitHub Desktop.
Save skinkade/a9a0ebf8855aad59edaa3cf86ca0a022 to your computer and use it in GitHub Desktop.
pHash utility in Perl6 to detect duplicate/similar images
use NativeCall;
### DCT hashing & Hamming
# int ph_dct_imagehash(const char* file,ulong64 &hash);
sub ph_dct_imagehash(Str, uint64 is rw)
is native('pHash', v0)
returns int16
{ * }
# int ph_hamming_distance(const ulong64 hash1,const ulong64 hash2);
sub ph_hamming_distance(uint64, uint64)
is native('pHash', v0)
returns int16
{ * }
sub MAIN(*@paths) {
my $images := gather while ( @paths ) {
my $current = @paths.pop;
take $current if ($current.IO.f && $current.IO.extension.lc eq any <jpg jpeg png>);
@paths.append(dir($current)>>.path) if $current.IO.d;
}
my %dct;
for $images.race -> $file {
my uint64 $hash;
if !(ph_dct_imagehash($file, $hash) == 1) {
%dct{$file} = $hash if ($hash);
} else {
say "Could not create DCT hash for: $file";
}
}
for %dct.keys.combinations(2).race {
my $ref = $_[0];
my $ref_hash = %dct{$ref};
my $check = $_[1];
my $check_hash = %dct{$check};
if ($ref_hash == $check_hash) {
say "Exact match:\n$ref\n$check\n";
next;
}
my $distance = ph_hamming_distance($ref_hash, $check_hash);
if ($distance <= 10) {
say "Likely match ($distance):\n$ref\n$check\n";
} elsif ($distance <= 22) {
say "Possible match ($distance):\n$ref\n$check\n";
}
}
say now - INIT now;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment