Skip to content

Instantly share code, notes, and snippets.

@ywarnier
Created May 23, 2017 23:36
Show Gist options
  • Save ywarnier/4ca99d5a8e574b15419ae15ad50bdd31 to your computer and use it in GitHub Desktop.
Save ywarnier/4ca99d5a8e574b15419ae15ad50bdd31 to your computer and use it in GitHub Desktop.
Script to convert files in a Gallery2 database and directories to avoid spaces, so they can be imported in Piwigo
<?php
/**
* This script converts files in a Gallery2 database and directories structure
* to avoid spaces, so that they can be imported in Piwigo (which doesn't tolerate spaces)
* It gives a substantial amount of output, so we recommend executing with
* php g2-convert.php > output.txt
* to be able to analyse it afterwards.
* This script has been used successfully on a database of 249K pictures
* @author @ywarnier @BeezNest
* @date 2017
* @license GNU/LGPL
*/
// config.php should contain:
// $db = ['host' => 'localhost', 'db' => 'dbname', 'user' => 'user', 'pass' => 'password'];
// $dir = 'the-full-path-to-the-g2data/albums/';
require_once __DIR__.'/config.php';
// The following variables could as well be in config.php but they are easier to understand here
$totalFiles = 0; // A counter for the number of files scanned
$totalSpacedFiles = 0; // A counter for the number of files with spaced names (the ones we need to change)
$totalDisappearedFiles = 0; // A counter for the files that are in the database but not on disk
$printAll = false; // Whether we want to print all files paths that we found or just the ones with spaces
$counterLimit = 1000; // A maximum number of files with spaces that we want to change before stopping
$simulation = true; // Whether we want to simulate the conversion or really convert
// Connect to DB
try {
$dbh = new PDO('mysql:host='.$db['host'].';dbname='.$db['db'].'', $db['user'], $db['pass']);
} catch (PDOException $e) {
echo "Failed: ".$e->getMessage().PHP_EOL;
}
// Count items in g2_FileSystemEntity with a space in it
$statement = $dbh->prepare("SELECT COUNT(*) FROM g2_FileSystemEntity WHERE g_pathComponent LIKE '% %'");
$result = $statement->execute();
$count = $statement->fetch();
echo $count[0]." files with spaces found before process".PHP_EOL;
// The main action happens in this (recursive) function call
$paths = getSubTree(0, $dir);
// The final report:
echo "Total files counted: $totalFiles".PHP_EOL;
echo "Total spaced files: $totalSpacedFiles".PHP_EOL;
echo "Total files that could not be found on disk: $totalDisappearedFiles".PHP_EOL;
echo "Total files (to be) renamed: ".($totalSpacedFiles - $totalDisappearedFiles).PHP_EOL;
//
function getSubTree($root = 0, $path = '') {
// These variables could be passed as parameters, but given it's a recursive function, using
// global saves us a lot of efforts (plus all these settings are defined in this very script)
global $dbh, $totalFiles, $totalSpacedFiles, $totalDisappearedFiles, $printAll, $counterLimit, $simulation;
// Initialize this subtree
$tree = [];
// The first iteration is a bit different because we are looking for
// the root directory, which can be done either by looking at the highest
// parent ID from the g2_ChildEntity table, or simply at the only item in
// g2_FileSystemEntity that has a NULL g_pathComponent
if ($root == 0) {
// Select the root directory and start recursive scanning from it
$statement = $dbh->prepare('
SELECT g_id FROM g2_FileSystemEntity WHERE g_pathComponent IS NULL
');
$result = $statement->execute();
$row = $statement->fetch();
$rootId = $row[0];
$tree[0] = array(getSubTree($rootId, $path));
} else {
// Get the IDs and paths of all elements directly under the current one
$statement = $dbh->prepare('
SELECT f.g_id, f.g_pathComponent, i.g_canContainChildren
FROM g2_FileSystemEntity f INNER JOIN g2_Item i ON i.g_id = f.g_id
WHERE f.g_id IN (SELECT g_id FROM g2_ChildEntity WHERE g_parentId = '.$root.')
');
$result = $statement->execute();
// For each item at this level under this given root,
while ($row = $statement->fetch()) {
if ($counterLimit > 0 && $counter > $counterLimit) {
echo $totalSpacedFiles.'/'.$counterLimit.PHP_EOL;
break;
}
$totalFiles++;
// Build and show the current path
$newFullPath = $fullPath = $path.$row['g_pathComponent'];
$replace = false;
if (strpos($row['g_pathComponent'], ' ') !== false) {
echo $totalSpacedFiles.'/'.$counterLimit.PHP_EOL;
$totalSpacedFiles++;
$replace = true;
// Add a bit of additional clean-up: remove spaces before the extension dot...
if (strpos($row['g_pathComponent'], ' .') !== false) {
$newFullPath = str_replace(' .', '.', $newFullPath);
echo "W".PHP_EOL;
}
// ...and add an extension to those non-directories that don't have one
// Here we assume all no-extension files are JPG, but that might not be
// the case. Check your own directory for that.
if ($row['g_canContainChildren'] == 0 && strpos($row['g_pathComponent'], '.' === false)) {
$newFullPath .= '.jpg';
echo "X".PHP_EOL;
}
$newFullPathBak = $newFullPath; // keep a backup in case we need a new renaming
$newFullPath = str_replace(' ', '_', $newFullPath);
echo $fullPath.PHP_EOL;
if (file_exists($fullPath)) {
$replaceChar = '_';
// In the very unlikely event that a file with the same name with
// a '_' instead of a ' ' already exists, use a '-' sign to rename it
if (file_exists($newFullPath)) {
$replaceChar = '-';
$newFullPath = str_replace(' ', $replaceChar, $newFullPathBak);
echo "Y".PHP_EOL;
}
echo " -> ".$newFullPath.PHP_EOL;
// If this is a directory, we want to pay special attention to
// it when reviewing the logs, so print a "Z" after the item log
if ($row['g_canContainChildren'] == 1) {
echo "Z".PHP_EOL;
}
// If this is not a simulation, convert the files in DB and on disk
if (!$simulation) {
rename($fullPath, $newFullPath);
$modPath = str_replace(' ', $replaceChar, $row['g_pathComponent']);
$gid = intval($row['g_id']);
$sql = "UPDATE g2_FileSystemEntity
SET g_pathComponent = '$modPath'
WHERE g_id = $gid";
$statementUpdate = $dbh->prepare($sql);
try {
$resultUpdate = $statementUpdate->execute();
} catch (PDOException $e) {
echo "Failed: ".$e->getMessage().PHP_EOL;
// If one query generates an error, we finish execution
// before we affect too many files
die();
}
}
} else {
// We want to log files in DB but not on disk, for manual review
echo 'ZZZ -> file does not exist'.PHP_EOL;
$totalDisappearedFiles++;
}
} elseif ($printAll) {
echo $fullPath.PHP_EOL;
}
if ($row['g_canContainChildren'] == 1) {
// Sanity check for a final slash in the full path of directories
if (substr($newFullPath, -1, 1) != '/') {
$newFullPath .= '/';
}
// This is a directory (a node in our tree), so call the recursive
// function to get the child tree
$tree[$row['g_id']] = array(getSubTree($row['g_id'], $newFullPath));
} else {
// This is a file, so a leaf in our tree. Don't do anything more.
$tree[$row['g_id']] = $newFullPath;
}
}
}
return $tree;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment