Skip to content

Instantly share code, notes, and snippets.

@Tuurlijk
Created January 10, 2024 13:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tuurlijk/e9fd00934ed9f38edbb4b0b7a97b8253 to your computer and use it in GitHub Desktop.
Save Tuurlijk/e9fd00934ed9f38edbb4b0b7a97b8253 to your computer and use it in GitHub Desktop.
An optimized version of the orphan records command
<?php
declare(strict_types=1);
namespace TYPO3\CMS\Lowlevel\Command;
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use TYPO3\CMS\Backend\Utility\BackendUtility;
use TYPO3\CMS\Core\Core\Bootstrap;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
use TYPO3\CMS\Core\DataHandling\DataHandler;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
*
*/
class OrphanRecordsCommand extends Command
{
/**
* Configure the command by defining the name, options and arguments
*/
public function configure()
{
$this
->setDescription('Find and delete records that have lost their connection with the page tree')
->setHelp('Assumption: All actively used records on the website from TCA configured tables are located in the page tree exclusively.
All records managed by TYPO3 via the TCA array configuration has to belong to a page in the page tree, either directly or indirectly as a version of another record.
VERY TIME, CPU and MEMORY intensive operation since the full page tree is looked up!
Automatic Repair of Errors:
- Silently deleting the orphaned records. In theory they should not be used anywhere in the system, but there could be references. See below for more details on this matter.
Manual repair suggestions:
- Possibly re-connect orphaned records to page tree by setting their "pid" field to a valid page id. A lookup in the sys_refindex table can reveal if there are references to a orphaned record. If there are such references (from records that are not themselves orphans) you might consider to re-connect the record to the page tree, otherwise it should be safe to delete it.
If you want to get more detailed information, use the --verbose option.')
->addOption(
'dry-run',
null,
InputOption::VALUE_NONE,
'If this option is set, the records will not actually be deleted, but just the output which records would be deleted are shown'
);
}
/**
* Executes the command to find records not attached to the pagetree
* and permanently delete these records
*
* @param InputInterface $input
* @param OutputInterface $output
*/
protected function execute(InputInterface $input, OutputInterface $output)
{
// Make sure the _cli_ user is loaded
Bootstrap::initializeBackendAuthentication();
$io = new SymfonyStyle($input, $output);
$io->title($this->getDescription());
if ($io->isVerbose()) {
$io->section('Searching the database now for orphaned records');
}
// type unsafe comparison and explicit boolean setting on purpose
$dryRun = $input->hasOption('dry-run') && $input->getOption('dry-run') != false ? true : false;
$orphanPages = $this->findOrphanPages($io);
if ($io->isVerbose()) {
$io->writeln(sprintf("Found <info>%s</info> orphaned pages", number_format(count($orphanPages))));
}
$io->section("Inspecting tables for orphaned records");
$orphanedRecords = $this->findRecordsOnOrphanedPages($orphanPages);
foreach ($orphanedRecords as $table => $ids) {
if ($io->isVerbose()) {
$io->writeln(sprintf("Table <options=bold>%s</> has <info>%d</info> orphaned records", $table, number_format(count($ids))));
}
}
$orphanedRecords['pages'] = $orphanPages;
$io->section("Deleting orphaned records from tables");
$this->deleteRecords($orphanedRecords, $dryRun, $io);
}
/**
* Find parents for pages
*
* Each page in pages is an array containing 'uid' as u, 'pid' as p and 'deleted' as d.
* - If a record is deleted, we add the record to orphans
* - If $parents contains the pid, the uid can become a parent too, so we add it to $parents
*
* @param array $pages
* @param array $parents
* @return array
*/
protected function findParents(array $pages, array &$parents): array
{
$orphans = [];
foreach ($pages as $page) {
if ($page['d']) {
$orphans[] = $page;
continue;
}
if (isset($parents[$page['p']])) {
$parents[$page['u']] = true;
} else {
$orphans[] = $page;
}
}
return $orphans;
}
/**
* Get uid, pid and deleted values for all pages, including deleted
* @return mixed[]
*/
protected function getAllPages()
{
/** @var QueryBuilder $queryBuilder */
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
$queryBuilder->getRestrictions()->removeAll();
$statement = $queryBuilder
->selectLiteral('uid as u', 'pid as p', 'deleted as d')
->from('pages')
->execute();
return $statement->fetchAll();
}
/**
* Finds all records that have a page ID without a connection to the root page.
*
* - Find all page id's and their parent page id's
* - Iterate over the records and exhaustively try to attach all records to a parent record
* - Records that cannot be attached are orphan pages
*
* @param SymfonyStyle $io
* @return array
*/
private function findOrphanPages(SymfonyStyle $io): array
{
$pages = $this->getAllPages();
$parents = [0 => true];
if ($io->isVerbose()) {
$io->writeln(sprintf("Found <info>%s</info> pages", number_format(count($pages))));
}
$orphans = $this->findParents($pages, $parents);
if ($io->isVerbose()) {
$io->writeln(sprintf("Found <info>%s</info> potential orphans", number_format(count($orphans))));
}
$lastOrphanCount = count($orphans);
$orphanCount = count($orphans) - 1;
while ($orphanCount < $lastOrphanCount) {
$lastOrphanCount = count($orphans);
$orphans = $this->findParents($orphans, $parents);
$orphanCount = count($orphans);
if ($orphanCount < $lastOrphanCount) {
if ($io->isVerbose()) {
$io->writeln(sprintf("Reduced to <info>%s</info> potential orphans", number_format(count($orphans))));
}
}
}
$orphan_uids = [];
foreach ($orphans as $orphan) {
if ($orphan['u'] === 0) {
continue;
}
// Add any versions of pages
$versions = BackendUtility::selectVersionsOfRecord('pages', $orphan['u'], 'uid,t3ver_oid,t3ver_wsid,t3ver_count', null, true);
if (!is_array($versions)) {
continue;
}
foreach ($versions as $verRec) {
if (!$verRec['_CURRENT_VERSION']) {
$orphan_uids[] = (int)$verRec['uid'];
}
}
$orphan_uids[] = $orphan['u'];
}
return $orphan_uids;
}
private function findRecordsOnOrphanedPages(array $orphanPages): array
{
$allRecords = [];
foreach (array_keys($GLOBALS['TCA']) as $table) {
if ($table === 'pages') {
continue;
}
/** @var QueryBuilder $queryBuilder */
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
$queryBuilder->getRestrictions()->removeAll();
$result = $queryBuilder
->select('uid')
->from($table)
->where($queryBuilder->expr()->in('pid', $queryBuilder->createNamedParameter($orphanPages, \TYPO3\CMS\Core\Database\Connection::PARAM_INT_ARRAY)))
->execute();
while ($row = $result->fetch()) {
$allRecords[$table][$row['uid']] = $row['uid'];
// Add any versions of those records:
$versions = BackendUtility::selectVersionsOfRecord($table, $row['uid'], 'uid,t3ver_wsid,t3ver_count', null, true);
if (is_array($versions)) {
foreach ($versions as $verRec) {
if (!$verRec['_CURRENT_VERSION']) {
$allRecords[$table][$verRec['uid']] = $verRec['uid'];
}
}
}
}
}
return $allRecords;
}
/**
* Deletes records via DataHandler
*
* @param array $orphanedRecords two level array with tables and uids
* @param bool $dryRun check if the records should NOT be deleted (use --dry-run to avoid)
* @param SymfonyStyle $io
*/
protected function deleteRecords(array $orphanedRecords, bool $dryRun, SymfonyStyle $io)
{
// Putting "pages" table in the bottom
if (isset($orphanedRecords['pages'])) {
$_pages = $orphanedRecords['pages'];
unset($orphanedRecords['pages']);
// To delete sub pages first assuming they are accumulated from top of page tree.
$orphanedRecords['pages'] = array_reverse($_pages);
}
// set up the data handler instance
$dataHandler = GeneralUtility::makeInstance(DataHandler::class);
$dataHandler->start([], []);
// Loop through all tables and their records
foreach ($orphanedRecords as $table => $list) {
if ($io->isVerbose()) {
$io->writeln(sprintf("Flushing <info>%s</info> orphaned records from table <options=bold>%s</>", number_format(count($list)), $table));
}
foreach ($list as $uid) {
if ($io->isVeryVerbose()) {
$io->writeln('Flushing record "' . $table . ':' . $uid . '"');
}
if (!$dryRun) {
// Notice, we are deleting pages with no regard to subpages/subrecords - we do this since they
// should also be included in the set of deleted pages of course (no un-deleted record can exist
// under a deleted page...)
$dataHandler->deleteRecord($table, $uid, true, true);
// Return errors if any:
if (!empty($dataHandler->errorLog)) {
$errorMessage = array_merge(['DataHandler reported an error'], $dataHandler->errorLog);
$io->error($errorMessage);
} elseif (!$io->isQuiet()) {
$io->writeln('Permanently deleted orphaned record "' . $table . ':' . $uid . '".');
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment