Skip to content

Instantly share code, notes, and snippets.

@vijinho
Created December 25, 2019 02:48
Show Gist options
  • Save vijinho/09f7b542d2671bf9b67db3ef58e6767d to your computer and use it in GitHub Desktop.
Save vijinho/09f7b542d2671bf9b67db3ef58e6767d to your computer and use it in GitHub Desktop.
Display transliterated files and folders to non-accented ASCII characters script - rename feature tested successfully with a filesystem of over 65,000 files - USE AT OWN RISK!
#!/usr/bin/php
<?php
// show and optionally rename files and dirs to transliterated unaccented ascii from current working folder
// 2019-12-24 Vijay Mahrra vijay@yoyo.org
// code is public domain
define('DEBUG', true); // use pre-generated file list
define('VERBOSE', true);
define('RENAME', false); // allow renaming
define('RENAME_FILES', false);
define('RENAME_DIRS', false);
define('PRESERVE_TIME', true); // preserve earliest file time from m/c/atime
function output($message, $str = '', $date_format = 'Y-m-d h:i:s') {
if (is_array($message)) {
$message = print_r($message,1);
}
if (is_array($str)) {
$str = print_r($str,1);
}
fwrite(STDOUT, sprintf("\n%s\t%s", date($date_format), $message));
if (!empty($str)) {
fwrite(STDOUT, "\n\t" . $str);
}
flush();
}
// get minimum, earliest time of a file path
function filetime_min($path) {
if (!file_exists($path)) {
return false;
}
$atime = fileatime($path);
$mtime = filemtime($path);
$ctime = filectime($path);
return min($atime, $mtime, $ctime);
}
$include_paths = ['/data/','/media/', '/Users/', '/home/']; // ONLY process paths containing these strings
$ignore_paths = ['.debris', '/backup/']; // IGNORE paths containing these strings
if (DEBUG) {
output("Include Path: ", $include_paths);
output("Ignore Path: ", $ignore_paths);
}
//$tmpfile = 'files.txt'; // find . -type f > files.txt
if (!empty($tmpfile) && !file_exists($tmpfile)) {
output("File to process does not exist: $tmpfile");
goto end;
}
// generate a tmp file to be deleted after script ends
$i = 0;
if (empty($tmpfile)) {
$delete_file = true;
// get a temporary filename to process all files in subfolders
$tmpfile = realpath(tempnam(sys_get_temp_dir(), 'php_transliterate_'));
if (DEBUG)
output("Created tmpfile: $tmpfile");
$cmd = 'find ' . getcwd() . ' -type f > ' . realpath($tmpfile);
if (DEBUG)
output("Generating files list: $cmd");
system($cmd);
if (0 == filesize($tmpfile)) {
if (DEBUG) {
output("Could not generate tmp file: $tmpfile\n");
goto end;
}
}
}
// dirs and files to be renamed
$rename_dirs = [];
$rename_files = [];
$h = fopen($tmpfile, 'r'); // process line by line to avoid memory issues
while (($path = fgets($h)) !== false) {
$i++;
// check if the path should be ignored or not
$path = trim($path);
$path_ok = false;
foreach ($include_paths as $str) {
if (!empty($str) && strstr($path, $str) !== false) {
$path_ok = true;
break;
}
}
foreach ($ignore_paths as $str) {
if (!empty($str) && strstr($path, $str) !== false) {
$path_ok = false;
break;
}
}
if (!$path_ok) {
if (VERBOSE) {
output("Skipping:", $path);
}
continue;
}
// get the current dir and filename
if (DEBUG) {
if (VERBOSE) {
output("Checking:", $path);
}
}
$filename = basename($path);
$dir = substr($path, 0, strlen($path) - strlen($filename) - 1);
// check the dir has characters that can be transliterated, if so, add to list
$newdir = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $dir);
if (!empty($newdir) && $newdir !== $dir && !array_key_exists($newdir, $rename_dirs)) {
if (VERBOSE)
output("Will transliterate directory: $dir");
$rename_dirs[iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', $newdir)] = $dir;
}
// if the filename can be transliterated, add to list
$newfilename = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $filename);
if (!empty($newfilename) && $newfilename !== $filename) {
if (VERBOSE)
output("Will transliterate file: $filename");
$rename_files[$path] = $dir . '/' . iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', $newfilename);
}
}
fclose($h);
// list files normalized
echo "\n";
foreach ($rename_files as $path => $newpath) {
printf("\n%s\n%s\t\n%s\t\n", $path, basename($path), basename($newpath));
if (RENAME && RENAME_FILES) { // && file_exists($path) && is_file($path)
$ts = filetime_min($path);
if (!rename($path, $newpath)) {
if (VERBOSE)
output("Could not rename file!\n\t$path\n\t$newpath");
} else if (file_exists($newpath) && PRESERVE_TIME) {
touch($newpath, $ts, $ts);
}
}
}
echo "\n";
$renames = [];
foreach ($rename_dirs as $dest => $src) {
$src_parts = (preg_split('/\//', $src));
$dest_parts = (preg_split('/\//', $dest));
foreach ($src_parts as $i => $part) {
if ($src_parts[$i] !== $dest_parts[$i]) {
$a = '/' . join('/', array_slice($src_parts,1,$i));
$b = '/' . join('/', array_slice($dest_parts,1,$i));
$src_parts[$i] = $dest_parts[$i];
$renames[$a] = $b;
}
}
}
foreach ($renames as $path => $newpath) {
printf("\n%s\t\n%s\t\n", $path, $newpath);
if (RENAME && RENAME_DIRS && file_exists($path) && is_dir($path)) {
$ts = filetime_min($path);
if (!rename($path, $newpath)) {
if (VERBOSE)
output("Could not rename file!\n\t$path\n\t$newpath");
} else if (file_exists($newpath) && PRESERVE_TIME) {
touch($newpath, $ts, $ts);
}
}
}
// if tmp file was created
if (!empty($delete_file)) {
if (DEBUG)
output("Deleting tmp file: " . $tmpfile);
unlink($tmpfile);
}
end:
output("Processed files: " . (int) $i);
if (!empty($rename_files)) {
output("Transliterated files: " . count($rename_files));
}
if (!empty($rename_dirs)) {
output("Transliterated dirs: " . count($rename_dirs));
}
echo "\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment