-
-
Save jraddaoui/5fa6e91c6f0e02b5759ac54f287aea0b to your computer and use it in GitHub Desktop.
AtoM script to partially fix and delete broken IOs (among other clean-ups)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
print("Have you created a backup of the database? [y/N]: "); | |
flush(); | |
ob_flush(); | |
$confirmation = trim(fgets(STDIN)); | |
if ($confirmation !== 'y') | |
{ | |
exit (0); | |
} | |
QubitSearch::disable(); | |
print("Counts before clean-up:\n"); | |
$sql = 'SELECT id | |
FROM information_object | |
WHERE parent_id IS NULL AND id<>1;'; | |
$noParentIos = QubitPdo::fetchAll( | |
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN] | |
); | |
print(sprintf(" - IOs without parent id: %d\n", count($noParentIos))); | |
$sql = 'SELECT io.id | |
FROM information_object io | |
LEFT JOIN object o ON io.id=o.id | |
WHERE o.id IS NULL;'; | |
$noObjectIos = QubitPdo::fetchAll( | |
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN] | |
); | |
print(sprintf(" - IOs without object row: %d\n", count($noObjectIos))); | |
$sql = 'SELECT io.id | |
FROM information_object io | |
LEFT JOIN object o ON io.id=o.id | |
WHERE o.id IS NULL OR (io.parent_id IS NULL AND io.id<>1);'; | |
$iosWithoutAny = QubitPdo::fetchAll( | |
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN] | |
); | |
print(sprintf(" - IOs without any of those: %d\n", count($iosWithoutAny))); | |
print(sprintf( | |
"Partially fix and delete %d IOs and their descendants? [y/N]: ", | |
count($iosWithoutAny) | |
)); | |
flush(); | |
ob_flush(); | |
$confirmation = trim(fgets(STDIN)); | |
if ($confirmation !== 'y') | |
{ | |
exit (0); | |
} | |
// Create CSV file before starting the clean-up in case it fails | |
$csvFile = fopen('deleted_ios.csv', 'w'); | |
fputcsv($csvFile, ['id', 'parent_id', 'lft', 'rgt', 'slug']); | |
print("Fixing object relation for other resources:\n"); | |
// List of classes with a related object row | |
$classes = [ | |
// The first four extend QubitActor too and need to go before it | |
'QubitRepository', | |
'QubitRightsHolder', | |
'QubitUser', | |
'QubitDonor', | |
'QubitActor', | |
'QubitAip', | |
'QubitJob', | |
'QubitDigitalObject', | |
'QubitEvent', | |
'QubitFunction', | |
// Skipping IOs as they will be fixed and deleted later | |
// 'QubitInformationObject', | |
'QubitObjectTermRelation', | |
'QubitPhysicalObject', | |
'QubitPremisObject', | |
'QubitRelation', | |
'QubitRights', | |
'QubitRightsHolder', | |
'QubitStaticPage', | |
'QubitTaxonomy', | |
'QubitTerm', | |
'QubitAccession', | |
'QubitDeaccession' | |
]; | |
foreach ($classes as $class) | |
{ | |
$fixed = 0; | |
$deleteCondition = "<> '$class'"; | |
// Rows from the actor table can have multiple classes | |
if ($class == 'QubitActor') | |
{ | |
$deleteCondition = "NOT IN ( | |
'QubitActor', | |
'QubitRepository', | |
'QubitRightsHolder', | |
'QubitUser', | |
'QubitDonor' | |
)"; | |
} | |
// Delete resources linked to a wrong object row | |
$sql = 'DELETE tb FROM ' . $class::TABLE_NAME . ' tb | |
LEFT JOIN object o ON tb.id=o.id | |
WHERE o.class_name ' . $deleteCondition . ';'; | |
$fixed = QubitPdo::modify($sql); | |
// Find resources without object row | |
$sql = 'SELECT tb.id | |
FROM ' . $class::TABLE_NAME . ' tb | |
LEFT JOIN object o ON tb.id=o.id | |
WHERE o.id IS NULL OR o.class_name IS NULL;'; | |
$noObjectIds = QubitPdo::fetchAll( | |
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN] | |
); | |
foreach ($noObjectIds as $id) | |
{ | |
// Insert object row with current dates | |
$sql = 'INSERT INTO object | |
(class_name, created_at, updated_at, id, serial_number) | |
VALUES | |
(:class, now(), now(), :id, 0);'; | |
QubitPdo::modify( | |
$sql, [':class' => $class, ':id' => $id] | |
); | |
$fixed++; | |
} | |
print(sprintf(" - %s: %d\n", $class, $fixed)); | |
} | |
// Set root term as parent for terms without one | |
$sql = 'UPDATE term SET parent_id=110 WHERE parent_id IS NULL AND id<>110;'; | |
$updated = QubitPdo::modify($sql); | |
print(sprintf("Updating terms without parent id: %d\n", $updated)); | |
print("Deleting IOs ...\n"); | |
$connection = Propel::getConnection(); | |
$deletedIos = []; | |
// Fix and delete problematic IOs and descendants within transactions | |
foreach ($iosWithoutAny as $ioId) | |
{ | |
$connection->beginTransaction(); | |
try | |
{ | |
deleteTree($ioId, $noObjectIos, $noParentIos, $deletedIos, $csvFile); | |
$connection->commit(); | |
} | |
catch (Exception $e) | |
{ | |
$connection->rollBack(); | |
throw $e; | |
} | |
} | |
fclose($csvFile); | |
print(sprintf("Done! %d IOs deleted.\n", count($deletedIos))); | |
print("Rebuilding nested set ...\n"); | |
$task = new propelBuildNestedSetTask($this->dispatcher, $this->formatter); | |
$task->setConfiguration($this->configuration); | |
$task->run(); | |
print( | |
"The ES index has not been updated! Run the search:populate task to do so.\n" | |
); | |
function deleteTree($id, $noObjectIos, $noParentIos, &$deletedIos, $csvFile) | |
{ | |
// Skip already deleted IOs | |
if (in_array($id, $deletedIos)) | |
{ | |
return; | |
} | |
// Get current IO data and write CSV row | |
$sql = 'SELECT io.id, io.parent_id, io.lft, io.rgt, slug | |
FROM information_object io | |
LEFT JOIN slug ON io.id=slug.object_id | |
WHERE io.id=:id;'; | |
$stmt = QubitPdo::prepareAndExecute($sql, [':id' => $id]); | |
fputcsv($csvFile, $stmt->fetch(PDO::FETCH_NUM)); | |
// Fix missing object row | |
if (in_array($id, $noObjectIos)) | |
{ | |
$sql = 'INSERT INTO object | |
(class_name, created_at, updated_at, id, serial_number) | |
VALUES | |
(:class, now(), now(), :id, 0);'; | |
QubitPdo::modify( | |
$sql, [':class' => 'QubitInformationObject', ':id' => $id] | |
); | |
} | |
// Set root IO as parent if missing | |
if (in_array($id, $noParentIos)) | |
{ | |
$sql = 'UPDATE information_object | |
SET parent_id=1 | |
WHERE id=:id;'; | |
QubitPdo::modify($sql, [':id' => $id]); | |
} | |
// Find children | |
$sql = 'SELECT id FROM information_object WHERE parent_id=:id;'; | |
$childrenIds = QubitPdo::fetchAll( | |
$sql, [':id' => $id], ['fetchMode' => PDO::FETCH_COLUMN] | |
); | |
// Delete children first | |
foreach ($childrenIds as $childId) | |
{ | |
deleteTree($childId, $noObjectIos, $noParentIos, $deletedIos, $csvFile); | |
} | |
// Delete IO without updating nested set | |
$io = QubitInformationObject::getById($id); | |
$io->disableNestedSetUpdating = true; | |
$io->delete(); | |
// Avoid high memory usage | |
Qubit::clearClassCaches(); | |
// Keep track of deleted IOs | |
$deletedIos[] = $id; | |
$deletedIosCount = count($deletedIos); | |
// Log progress each 100 deletions | |
if ($deletedIosCount % 100 == 0) | |
{ | |
print(sprintf("%d IOs deleted ...\n", $deletedIosCount)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment