Last active
March 28, 2017 02:57
-
-
Save wsams/bdd7437423779c620d98a12e2ab21bdf to your computer and use it in GitHub Desktop.
Create a JSON object containing the sha256 hash of each photo in a directory. The main usage is to identify and quarantine or remove duplicates.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// hardcoded files created: pics.list, error.log | |
// config | |
$hashf = 'hash.json'; | |
// globals | |
$movetotrash = true; # you can change this | |
$needskip = false; # must leave false | |
// functions | |
function name($picpath) { | |
return preg_replace("/^.*\//", "", $picpath); | |
} | |
function sha256($picpath) { | |
return hash('sha256', file_get_contents($picpath)); | |
} | |
function readhash($hashf) { | |
return json_decode(file_get_contents($hashf), true); | |
} | |
function add2hash($hashdata, $data) { | |
if (inhash($data['hash'], $hashdata)) { | |
$old = $data['path']; | |
$new = "trash/" . (preg_replace("/\//", "", filemtime($data['path']) . (rand(0, 999)) . '/' . $data['path'])); | |
$error = "{$data['path']} EXISTS AS {$hashdata[$data['hash']]['path']}. "; | |
if ($GLOBALS['movetotrash']) { | |
$error .= "mv {$old} {$new}\n"; | |
rename($old, $new); | |
} | |
print($error); | |
$GLOBALS['needskip'] = true; | |
// TODO: here we need to move the cursor down and not up because these errors should be printed and not hidden | |
// http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html | |
file_put_contents("error.log", "{$error}\n", FILE_APPEND); | |
} else { | |
$hashdata[$data['hash']] = $data; | |
} | |
return $hashdata; | |
} | |
function writehash($hashf, $hashdata, $data=null) { | |
if (!is_null($data)) { | |
$hashdata[$data['hash']] = $data; | |
} | |
return file_put_contents($hashf, json_encode($hashdata)); | |
} | |
function inhash($hash, $hashdata) { | |
return array_key_exists($hash, $hashdata); | |
} | |
function exists($picpath, $hashf) { | |
return inhash(sha25($picpath), readhash($hashf)); | |
} | |
function console($numpics, $curindex, $msg) { | |
$percent = 100 * round(intval($curindex) / (intval($numpics) - 1), 2); | |
$of = ($curindex + 1) . " of {$numpics}"; | |
if ($GLOBALS['needskip']) { | |
$GLOBALS['needskip'] = false; | |
} else { | |
system('tput cuu1'); | |
system('tput el'); | |
} | |
print("{$percent}% finished : {$of} : {$msg}\n"); | |
} | |
function picslist() { | |
`find . -iname '*.jpg' > pics.list`; | |
} | |
function initerrorlog() { | |
file_put_contents('error.log', ''); | |
} | |
function inithash($hashf) { | |
file_put_contents($hashf, '{}'); | |
} | |
function cleanup() { | |
unlink('error.log'); | |
unlink($hashf); | |
} | |
function writeall($hashf) { | |
$pics = file('pics.list', FILE_IGNORE_NEW_LINES); | |
$hashdata = readhash($hashf); | |
$numpics = count($pics); | |
foreach($pics as $k=>$picpath) { | |
$name = name($picpath); | |
console($numpics, $k, $picpath); | |
$hash = sha256($picpath); | |
$hashdata = add2hash($hashdata, array( | |
"hash" => $hash, | |
"name" => $name, | |
"path" => $picpath, | |
"size" => filesize($picpath))); | |
} | |
writehash($hashf, $hashdata); | |
} | |
function initdirs() { | |
if (!file_exists("trash")) { | |
mkdir("trash"); | |
} | |
} | |
// actions | |
$action = readline("Enter action: <init|clean|add2hash> "); | |
if ($action === 'init') { | |
initerrorlog(); | |
inithash($hashf); | |
initdirs(); | |
exit(); | |
} else if ($action === 'clean') { | |
cleanup(); | |
exit(); | |
} else if ($action === 'add2hash') { | |
picslist(); | |
if (!file_exists($hashf)) { | |
inithash($hashf); | |
} | |
initerrorlog(); | |
writeall($hashf); | |
exit(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment