Created
May 23, 2017 23:36
-
-
Save ywarnier/4ca99d5a8e574b15419ae15ad50bdd31 to your computer and use it in GitHub Desktop.
Script to convert files in a Gallery2 database and directories to avoid spaces, so they can be imported in Piwigo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This script converts files in a Gallery2 database and directories structure | |
* to avoid spaces, so that they can be imported in Piwigo (which doesn't tolerate spaces) | |
* It gives a substantial amount of output, so we recommend executing with | |
* php g2-convert.php > output.txt | |
* to be able to analyse it afterwards. | |
* This script has been used successfully on a database of 249K pictures | |
* @author @ywarnier @BeezNest | |
* @date 2017 | |
* @license GNU/LGPL | |
*/ | |
// config.php should contain: | |
// $db = ['host' => 'localhost', 'db' => 'dbname', 'user' => 'user', 'pass' => 'password']; | |
// $dir = 'the-full-path-to-the-g2data/albums/'; | |
require_once __DIR__.'/config.php'; | |
// The following variables could as well be in config.php but they are easier to understand here | |
$totalFiles = 0; // A counter for the number of files scanned | |
$totalSpacedFiles = 0; // A counter for the number of files with spaced names (the ones we need to change) | |
$totalDisappearedFiles = 0; // A counter for the files that are in the database but not on disk | |
$printAll = false; // Whether we want to print all files paths that we found or just the ones with spaces | |
$counterLimit = 1000; // A maximum number of files with spaces that we want to change before stopping | |
$simulation = true; // Whether we want to simulate the conversion or really convert | |
// Connect to DB | |
try { | |
$dbh = new PDO('mysql:host='.$db['host'].';dbname='.$db['db'].'', $db['user'], $db['pass']); | |
} catch (PDOException $e) { | |
echo "Failed: ".$e->getMessage().PHP_EOL; | |
} | |
// Count items in g2_FileSystemEntity with a space in it | |
$statement = $dbh->prepare("SELECT COUNT(*) FROM g2_FileSystemEntity WHERE g_pathComponent LIKE '% %'"); | |
$result = $statement->execute(); | |
$count = $statement->fetch(); | |
echo $count[0]." files with spaces found before process".PHP_EOL; | |
// The main action happens in this (recursive) function call | |
$paths = getSubTree(0, $dir); | |
// The final report: | |
echo "Total files counted: $totalFiles".PHP_EOL; | |
echo "Total spaced files: $totalSpacedFiles".PHP_EOL; | |
echo "Total files that could not be found on disk: $totalDisappearedFiles".PHP_EOL; | |
echo "Total files (to be) renamed: ".($totalSpacedFiles - $totalDisappearedFiles).PHP_EOL; | |
// | |
function getSubTree($root = 0, $path = '') { | |
// These variables could be passed as parameters, but given it's a recursive function, using | |
// global saves us a lot of efforts (plus all these settings are defined in this very script) | |
global $dbh, $totalFiles, $totalSpacedFiles, $totalDisappearedFiles, $printAll, $counterLimit, $simulation; | |
// Initialize this subtree | |
$tree = []; | |
// The first iteration is a bit different because we are looking for | |
// the root directory, which can be done either by looking at the highest | |
// parent ID from the g2_ChildEntity table, or simply at the only item in | |
// g2_FileSystemEntity that has a NULL g_pathComponent | |
if ($root == 0) { | |
// Select the root directory and start recursive scanning from it | |
$statement = $dbh->prepare(' | |
SELECT g_id FROM g2_FileSystemEntity WHERE g_pathComponent IS NULL | |
'); | |
$result = $statement->execute(); | |
$row = $statement->fetch(); | |
$rootId = $row[0]; | |
$tree[0] = array(getSubTree($rootId, $path)); | |
} else { | |
// Get the IDs and paths of all elements directly under the current one | |
$statement = $dbh->prepare(' | |
SELECT f.g_id, f.g_pathComponent, i.g_canContainChildren | |
FROM g2_FileSystemEntity f INNER JOIN g2_Item i ON i.g_id = f.g_id | |
WHERE f.g_id IN (SELECT g_id FROM g2_ChildEntity WHERE g_parentId = '.$root.') | |
'); | |
$result = $statement->execute(); | |
// For each item at this level under this given root, | |
while ($row = $statement->fetch()) { | |
if ($counterLimit > 0 && $counter > $counterLimit) { | |
echo $totalSpacedFiles.'/'.$counterLimit.PHP_EOL; | |
break; | |
} | |
$totalFiles++; | |
// Build and show the current path | |
$newFullPath = $fullPath = $path.$row['g_pathComponent']; | |
$replace = false; | |
if (strpos($row['g_pathComponent'], ' ') !== false) { | |
echo $totalSpacedFiles.'/'.$counterLimit.PHP_EOL; | |
$totalSpacedFiles++; | |
$replace = true; | |
// Add a bit of additional clean-up: remove spaces before the extension dot... | |
if (strpos($row['g_pathComponent'], ' .') !== false) { | |
$newFullPath = str_replace(' .', '.', $newFullPath); | |
echo "W".PHP_EOL; | |
} | |
// ...and add an extension to those non-directories that don't have one | |
// Here we assume all no-extension files are JPG, but that might not be | |
// the case. Check your own directory for that. | |
if ($row['g_canContainChildren'] == 0 && strpos($row['g_pathComponent'], '.' === false)) { | |
$newFullPath .= '.jpg'; | |
echo "X".PHP_EOL; | |
} | |
$newFullPathBak = $newFullPath; // keep a backup in case we need a new renaming | |
$newFullPath = str_replace(' ', '_', $newFullPath); | |
echo $fullPath.PHP_EOL; | |
if (file_exists($fullPath)) { | |
$replaceChar = '_'; | |
// In the very unlikely event that a file with the same name with | |
// a '_' instead of a ' ' already exists, use a '-' sign to rename it | |
if (file_exists($newFullPath)) { | |
$replaceChar = '-'; | |
$newFullPath = str_replace(' ', $replaceChar, $newFullPathBak); | |
echo "Y".PHP_EOL; | |
} | |
echo " -> ".$newFullPath.PHP_EOL; | |
// If this is a directory, we want to pay special attention to | |
// it when reviewing the logs, so print a "Z" after the item log | |
if ($row['g_canContainChildren'] == 1) { | |
echo "Z".PHP_EOL; | |
} | |
// If this is not a simulation, convert the files in DB and on disk | |
if (!$simulation) { | |
rename($fullPath, $newFullPath); | |
$modPath = str_replace(' ', $replaceChar, $row['g_pathComponent']); | |
$gid = intval($row['g_id']); | |
$sql = "UPDATE g2_FileSystemEntity | |
SET g_pathComponent = '$modPath' | |
WHERE g_id = $gid"; | |
$statementUpdate = $dbh->prepare($sql); | |
try { | |
$resultUpdate = $statementUpdate->execute(); | |
} catch (PDOException $e) { | |
echo "Failed: ".$e->getMessage().PHP_EOL; | |
// If one query generates an error, we finish execution | |
// before we affect too many files | |
die(); | |
} | |
} | |
} else { | |
// We want to log files in DB but not on disk, for manual review | |
echo 'ZZZ -> file does not exist'.PHP_EOL; | |
$totalDisappearedFiles++; | |
} | |
} elseif ($printAll) { | |
echo $fullPath.PHP_EOL; | |
} | |
if ($row['g_canContainChildren'] == 1) { | |
// Sanity check for a final slash in the full path of directories | |
if (substr($newFullPath, -1, 1) != '/') { | |
$newFullPath .= '/'; | |
} | |
// This is a directory (a node in our tree), so call the recursive | |
// function to get the child tree | |
$tree[$row['g_id']] = array(getSubTree($row['g_id'], $newFullPath)); | |
} else { | |
// This is a file, so a leaf in our tree. Don't do anything more. | |
$tree[$row['g_id']] = $newFullPath; | |
} | |
} | |
} | |
return $tree; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment