Skip to content

Instantly share code, notes, and snippets.

@jraddaoui
Last active September 21, 2020 22:55
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jraddaoui/5fa6e91c6f0e02b5759ac54f287aea0b to your computer and use it in GitHub Desktop.
Save jraddaoui/5fa6e91c6f0e02b5759ac54f287aea0b to your computer and use it in GitHub Desktop.
AtoM script to partially fix and delete broken IOs (among other clean-ups)
<?php
print("Have you created a backup of the database? [y/N]: ");
flush();
ob_flush();
$confirmation = trim(fgets(STDIN));
if ($confirmation !== 'y')
{
exit (0);
}
QubitSearch::disable();
print("Counts before clean-up:\n");
$sql = 'SELECT id
FROM information_object
WHERE parent_id IS NULL AND id<>1;';
$noParentIos = QubitPdo::fetchAll(
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN]
);
print(sprintf(" - IOs without parent id: %d\n", count($noParentIos)));
$sql = 'SELECT io.id
FROM information_object io
LEFT JOIN object o ON io.id=o.id
WHERE o.id IS NULL;';
$noObjectIos = QubitPdo::fetchAll(
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN]
);
print(sprintf(" - IOs without object row: %d\n", count($noObjectIos)));
$sql = 'SELECT io.id
FROM information_object io
LEFT JOIN object o ON io.id=o.id
WHERE o.id IS NULL OR (io.parent_id IS NULL AND io.id<>1);';
$iosWithoutAny = QubitPdo::fetchAll(
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN]
);
print(sprintf(" - IOs without any of those: %d\n", count($iosWithoutAny)));
print(sprintf(
"Partially fix and delete %d IOs and their descendants? [y/N]: ",
count($iosWithoutAny)
));
flush();
ob_flush();
$confirmation = trim(fgets(STDIN));
if ($confirmation !== 'y')
{
exit (0);
}
// Create CSV file before starting the clean-up in case it fails
$csvFile = fopen('deleted_ios.csv', 'w');
fputcsv($csvFile, ['id', 'parent_id', 'lft', 'rgt', 'slug']);
print("Fixing object relation for other resources:\n");
// List of classes with a related object row
$classes = [
// The first four extend QubitActor too and need to go before it
'QubitRepository',
'QubitRightsHolder',
'QubitUser',
'QubitDonor',
'QubitActor',
'QubitAip',
'QubitJob',
'QubitDigitalObject',
'QubitEvent',
'QubitFunction',
// Skipping IOs as they will be fixed and deleted later
// 'QubitInformationObject',
'QubitObjectTermRelation',
'QubitPhysicalObject',
'QubitPremisObject',
'QubitRelation',
'QubitRights',
'QubitRightsHolder',
'QubitStaticPage',
'QubitTaxonomy',
'QubitTerm',
'QubitAccession',
'QubitDeaccession'
];
foreach ($classes as $class)
{
$fixed = 0;
$deleteCondition = "<> '$class'";
// Rows from the actor table can have multiple classes
if ($class == 'QubitActor')
{
$deleteCondition = "NOT IN (
'QubitActor',
'QubitRepository',
'QubitRightsHolder',
'QubitUser',
'QubitDonor'
)";
}
// Delete resources linked to a wrong object row
$sql = 'DELETE tb FROM ' . $class::TABLE_NAME . ' tb
LEFT JOIN object o ON tb.id=o.id
WHERE o.class_name ' . $deleteCondition . ';';
$fixed = QubitPdo::modify($sql);
// Find resources without object row
$sql = 'SELECT tb.id
FROM ' . $class::TABLE_NAME . ' tb
LEFT JOIN object o ON tb.id=o.id
WHERE o.id IS NULL OR o.class_name IS NULL;';
$noObjectIds = QubitPdo::fetchAll(
$sql, [], ['fetchMode' => PDO::FETCH_COLUMN]
);
foreach ($noObjectIds as $id)
{
// Insert object row with current dates
$sql = 'INSERT INTO object
(class_name, created_at, updated_at, id, serial_number)
VALUES
(:class, now(), now(), :id, 0);';
QubitPdo::modify(
$sql, [':class' => $class, ':id' => $id]
);
$fixed++;
}
print(sprintf(" - %s: %d\n", $class, $fixed));
}
// Set root term as parent for terms without one
$sql = 'UPDATE term SET parent_id=110 WHERE parent_id IS NULL AND id<>110;';
$updated = QubitPdo::modify($sql);
print(sprintf("Updating terms without parent id: %d\n", $updated));
print("Deleting IOs ...\n");
$connection = Propel::getConnection();
$deletedIos = [];
// Fix and delete problematic IOs and descendants within transactions
foreach ($iosWithoutAny as $ioId)
{
$connection->beginTransaction();
try
{
deleteTree($ioId, $noObjectIos, $noParentIos, $deletedIos, $csvFile);
$connection->commit();
}
catch (Exception $e)
{
$connection->rollBack();
throw $e;
}
}
fclose($csvFile);
print(sprintf("Done! %d IOs deleted.\n", count($deletedIos)));
print("Rebuilding nested set ...\n");
$task = new propelBuildNestedSetTask($this->dispatcher, $this->formatter);
$task->setConfiguration($this->configuration);
$task->run();
print(
"The ES index has not been updated! Run the search:populate task to do so.\n"
);
function deleteTree($id, $noObjectIos, $noParentIos, &$deletedIos, $csvFile)
{
// Skip already deleted IOs
if (in_array($id, $deletedIos))
{
return;
}
// Get current IO data and write CSV row
$sql = 'SELECT io.id, io.parent_id, io.lft, io.rgt, slug
FROM information_object io
LEFT JOIN slug ON io.id=slug.object_id
WHERE io.id=:id;';
$stmt = QubitPdo::prepareAndExecute($sql, [':id' => $id]);
fputcsv($csvFile, $stmt->fetch(PDO::FETCH_NUM));
// Fix missing object row
if (in_array($id, $noObjectIos))
{
$sql = 'INSERT INTO object
(class_name, created_at, updated_at, id, serial_number)
VALUES
(:class, now(), now(), :id, 0);';
QubitPdo::modify(
$sql, [':class' => 'QubitInformationObject', ':id' => $id]
);
}
// Set root IO as parent if missing
if (in_array($id, $noParentIos))
{
$sql = 'UPDATE information_object
SET parent_id=1
WHERE id=:id;';
QubitPdo::modify($sql, [':id' => $id]);
}
// Find children
$sql = 'SELECT id FROM information_object WHERE parent_id=:id;';
$childrenIds = QubitPdo::fetchAll(
$sql, [':id' => $id], ['fetchMode' => PDO::FETCH_COLUMN]
);
// Delete children first
foreach ($childrenIds as $childId)
{
deleteTree($childId, $noObjectIos, $noParentIos, $deletedIos, $csvFile);
}
// Delete IO without updating nested set
$io = QubitInformationObject::getById($id);
$io->disableNestedSetUpdating = true;
$io->delete();
// Avoid high memory usage
Qubit::clearClassCaches();
// Keep track of deleted IOs
$deletedIos[] = $id;
$deletedIosCount = count($deletedIos);
// Log progress each 100 deletions
if ($deletedIosCount % 100 == 0)
{
print(sprintf("%d IOs deleted ...\n", $deletedIosCount));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment