Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
<?php
if (!isset($argv[1])) {
die('Usage: deduplicate.php "Dropbox/Camera Uploads/2014"' . PHP_EOL);
}
$path = $argv[1];
if (!is_dir($path)) {
die("$path is not a dir!" . PHP_EOL);
}
$files = scandir($path);
$hashes = array();
$fileCount = count($files);
foreach ($files as $i => $file) {
if (substr($file, 0, 1) === '.') {
continue;
}
if ($i % 100 === 0) {
echo "MD5 $i / $fileCount" . PHP_EOL;
}
$filePath = $path . DIRECTORY_SEPARATOR . $file;
$md5 = md5_file($filePath);
if (isset($hashes[$md5])) {
$file1 = $path . DIRECTORY_SEPARATOR . $hashes[$md5];
$file2 = $filePath;
if (strstr($file1, '_')) {
echo "mv $file1 /Users/boy/.Trash/, duplicate of $file2" . PHP_EOL;
//rename($file1, '/Users/boy/.Trash/' . $hashes[$md5]);
}
else if (strstr($file2, '_')) {
echo "mv $file2 /Users/boy/.Trash/, duplicate of $file1" . PHP_EOL;
//rename($file2, '/Users/boy/.Trash/' . $file);
}
continue;
}
$hashes[$md5] = $file;
}
echo "DONE HASHING" . PHP_EOL;
//foreach ($files as $file) {
// $p = [];
// if (preg_match('/(?P<Y>\d{4})(?P<m>\d{2})(?P<d>\d{2})_(?P<H>\d{2})(?P<i>\d{2})(?P<s>\d{2})\.(?P<ext>\w+)/', $file, $p) < 1) {
// continue;
// }
//
// $wrongFilePattern = "/IMG-{$p['Y']}{$p['m']}{$p['d']}-WA\\d+.{$p['ext']}/";
//
// $wrongFile = false;
// foreach ($files as $file1) {
// if (preg_match($wrongFilePattern, $file1)) {
// $wrongFile = $file1;
// }
// }
// if (!$wrongFile) {
// continue;
// }
//
// $rightFilePath = $path . DIRECTORY_SEPARATOR . $file;
// $wrongFilePath = $path . DIRECTORY_SEPARATOR . $wrongFile;
//
// if (sha1_file($rightFilePath) !== sha1_file($wrongFilePath)) {
// continue;
// }
//
// echo "mv $wrongFilePath /Users/boy/.Trash/" . PHP_EOL;
// //rename($wrongFilePath, '/Users/boy/.Trash/' . $wrongFile);
//}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment