Skip to content

Instantly share code, notes, and snippets.

@nhalstead
Last active June 19, 2021 21:08
Show Gist options
  • Save nhalstead/60610a9e90263ccad38432a2fa02f77f to your computer and use it in GitHub Desktop.
Save nhalstead/60610a9e90263ccad38432a2fa02f77f to your computer and use it in GitHub Desktop.
Used to cleanup duplicate files. (by filename local to the folder)
#!/usr/bin/php
<?php
/**
* 1. Find all files that have the common symbols for copied files " (1)."
* 2. Sort list by the largest file size
* 3. Delete all other matches (and original filename without the number indication)
* 4. Rename the largest to match the original name
*
* This program uses a TXT to read in the index of files.
* Run the following to dump the file list.
*
* find "$PWD" -type f > index.txt
*
* Glob was not used since the normal find command can speed through it and allow for testing.
*
*/
echo "Finding Files" . PHP_EOL;
$all = file('index.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
// Detect Duplicate Files from the Index
$dups = array_filter($all, function ($fname) {
return preg_match("/ \(\d+\)\./m", $fname);
});
$pairs = [];
// Loop through the files and pair them up by the "root" or "main" part of the filename
foreach($dups as $fname) {
// Remove the duplicate number indication using regex.
$pk = preg_replace("/ \(\d+\)\./m", ".", $fname);
$pkFn = basename($pk);
if(!isset($pairs[ $pkFn ])) {
$pairs[ $pkFn ] = array();
// Check if a file exists without the duplicate number indication
if (file_exists($pk)) {
$pairs[ $pkFn ][] = $pk;
}
}
$pairs[ $pkFn ][] = $fname;
}
// Order Each of the Pairs by Size (Largest First)
foreach($pairs as $key => $set) {
usort($pairs[$key], function ($a, $b){
// Check filesize
return filesize($b) - filesize($a);
});
}
// Loop through each pair and rename the first entry [0] (largest) and delete the others
foreach($pairs as $bestName => $set) {
if(count($set) === 1) {
echo "Skip" . PHP_EOL;
continue;
}
$newFileOldName = $set[0];
foreach($set as $index => $fname) {
// Skip the first entry.
if ($index === 0) continue;
echo "Remove " . $fname . PHP_EOL;
unlink($fname);
}
// Rename the new file to be the correct file
$newFileNewName = str_replace(basename($newFileOldName), $bestName, $newFileOldName);
if ($newFileNewName !== $newFileOldName) {
echo "Rename " . $newFileOldName . " -> " . $newFileNewName . PHP_EOL;
}
rename($newFileOldName, $newFileNewName);
echo PHP_EOL;
echo PHP_EOL;
}
echo "Done" . PHP_EOL;
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment