Skip to content

Instantly share code, notes, and snippets.

@wsams
Last active March 28, 2017 02:57
Show Gist options
  • Save wsams/bdd7437423779c620d98a12e2ab21bdf to your computer and use it in GitHub Desktop.
Save wsams/bdd7437423779c620d98a12e2ab21bdf to your computer and use it in GitHub Desktop.
Create a JSON object containing the sha256 hash of each photo in a directory. The main usage is to identify and quarantine or remove duplicates.
<?php
// hardcoded files created: pics.list, error.log
// config
$hashf = 'hash.json';
// globals
$movetotrash = true; # you can change this
$needskip = false; # must leave false
// functions
function name($picpath) {
return preg_replace("/^.*\//", "", $picpath);
}
function sha256($picpath) {
return hash('sha256', file_get_contents($picpath));
}
function readhash($hashf) {
return json_decode(file_get_contents($hashf), true);
}
function add2hash($hashdata, $data) {
if (inhash($data['hash'], $hashdata)) {
$old = $data['path'];
$new = "trash/" . (preg_replace("/\//", "", filemtime($data['path']) . (rand(0, 999)) . '/' . $data['path']));
$error = "{$data['path']} EXISTS AS {$hashdata[$data['hash']]['path']}. ";
if ($GLOBALS['movetotrash']) {
$error .= "mv {$old} {$new}\n";
rename($old, $new);
}
print($error);
$GLOBALS['needskip'] = true;
// TODO: here we need to move the cursor down and not up because these errors should be printed and not hidden
// http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html
file_put_contents("error.log", "{$error}\n", FILE_APPEND);
} else {
$hashdata[$data['hash']] = $data;
}
return $hashdata;
}
function writehash($hashf, $hashdata, $data=null) {
if (!is_null($data)) {
$hashdata[$data['hash']] = $data;
}
return file_put_contents($hashf, json_encode($hashdata));
}
function inhash($hash, $hashdata) {
return array_key_exists($hash, $hashdata);
}
function exists($picpath, $hashf) {
return inhash(sha25($picpath), readhash($hashf));
}
function console($numpics, $curindex, $msg) {
$percent = 100 * round(intval($curindex) / (intval($numpics) - 1), 2);
$of = ($curindex + 1) . " of {$numpics}";
if ($GLOBALS['needskip']) {
$GLOBALS['needskip'] = false;
} else {
system('tput cuu1');
system('tput el');
}
print("{$percent}% finished : {$of} : {$msg}\n");
}
function picslist() {
`find . -iname '*.jpg' > pics.list`;
}
function initerrorlog() {
file_put_contents('error.log', '');
}
function inithash($hashf) {
file_put_contents($hashf, '{}');
}
function cleanup() {
unlink('error.log');
unlink($hashf);
}
function writeall($hashf) {
$pics = file('pics.list', FILE_IGNORE_NEW_LINES);
$hashdata = readhash($hashf);
$numpics = count($pics);
foreach($pics as $k=>$picpath) {
$name = name($picpath);
console($numpics, $k, $picpath);
$hash = sha256($picpath);
$hashdata = add2hash($hashdata, array(
"hash" => $hash,
"name" => $name,
"path" => $picpath,
"size" => filesize($picpath)));
}
writehash($hashf, $hashdata);
}
function initdirs() {
if (!file_exists("trash")) {
mkdir("trash");
}
}
// actions
$action = readline("Enter action: <init|clean|add2hash> ");
if ($action === 'init') {
initerrorlog();
inithash($hashf);
initdirs();
exit();
} else if ($action === 'clean') {
cleanup();
exit();
} else if ($action === 'add2hash') {
picslist();
if (!file_exists($hashf)) {
inithash($hashf);
}
initerrorlog();
writeall($hashf);
exit();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment