Created
February 17, 2020 22:32
-
-
Save madmatt/4fc2c0a809fbafe5e480480fc9dcc947 to your computer and use it in GitHub Desktop.
Task to find and print out all HTML WYSIWYG fields that contain SS3.0/3.1-era resampled images that the SS4 MigrateFileTask will fail to fix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Tasks; | |
use Exception; | |
use PHPUnit\ExampleExtension\Comparator; | |
use SilverStripe\CMS\Model\SiteTree; | |
use SilverStripe\Control\HTTPRequest; | |
use SilverStripe\Core\ClassInfo; | |
use SilverStripe\Core\Injector\Injector; | |
use SilverStripe\Dev\BuildTask; | |
use SilverStripe\ORM\Connect\Query; | |
use SilverStripe\ORM\DataObject; | |
use SilverStripe\ORM\DataObjectSchema; | |
use SilverStripe\ORM\DB; | |
use SilverStripe\ORM\FieldType\DBField; | |
use SilverStripe\ORM\Queries\SQLSelect; | |
use SilverStripe\SiteConfig\SiteConfig; | |
use SilverStripe\Versioned\Versioned; | |
class FindAllHTMLFieldsTask extends BuildTask | |
{ | |
const VALID_TAGS = [ | |
'img' => 'image', | |
'a' => 'file_link' | |
]; | |
const VALID_ATTRIBUTES = [ | |
'src', | |
'href' | |
]; | |
const KNOWN_CLASSES = [ | |
'SiteConfig' => SiteConfig::class, | |
'SiteTree' => SiteTree::class, | |
]; | |
private static $segment = 'FindAllHTMLFieldsTask'; | |
/** @var string */ | |
private $validTagsPattern; | |
/** @var string */ | |
private $validAttributesPattern; | |
public function __construct() | |
{ | |
parent::__construct(); | |
$this->validTagsPattern = implode('|', array_keys(static::VALID_TAGS)); | |
$this->validAttributesPattern = implode('|', static::VALID_ATTRIBUTES); | |
} | |
/** | |
* Find all the HTML fields attached to all DataObjects to find any that contain broken resampled images | |
* | |
* @param HTTPRequest $request | |
* @return | |
*/ | |
public function run($request) | |
{ | |
$classes = $this->getFieldMap([ | |
'HTMLText', | |
'HTMLVarchar' | |
]); | |
$tableList = DB::table_list(); | |
$count = 0; | |
foreach ($classes as $class => $tables) { | |
/** @var DataObject $singleton */ | |
$singleton = singleton($class); | |
$hasVersions = | |
class_exists(Versioned::class) && | |
$singleton->hasExtension(Versioned::class) && | |
$singleton->hasStages(); | |
foreach ($tables as $table => $fields) { | |
foreach ($fields as $field) { | |
/** @var DBField $dbField */ | |
$dbField = DataObject::singleton($class)->dbObject($field); | |
if ($dbField && | |
$dbField->hasMethod('getProcessShortcodes') && | |
!$dbField->getProcessShortcodes()) { | |
continue; | |
} | |
if (!isset($tableList[strtolower($table)])) { | |
// When running unit test some tables won't be created. We'll just skip those. | |
continue; | |
} | |
// Update table | |
$count += $this->findShortcodesInField($table, $field); | |
// Update live | |
if ($hasVersions) { | |
$count += $this->findShortcodesInField($table.'_Live', $field); | |
} | |
} | |
} | |
} | |
printf("===================================================%s", PHP_EOL); | |
printf(" SUMMARY: Found %d images that need to be replaced %s", $count, PHP_EOL); | |
printf("===================================================%s", PHP_EOL); | |
return; | |
} | |
/** | |
* @param $table | |
* @param $field | |
* @return int Returns the number of valid (needing to be fixed) tags found in this field | |
*/ | |
public function findShortcodesInField($table, $field) | |
{ | |
$sqlSelect = SQLSelect::create(['"ID"', "\"$field\""], "\"$table\""); | |
$whereAnys = []; | |
foreach (array_keys(static::VALID_TAGS) as $tag) { | |
$whereAnys[]= "\"$table\".\"$field\" LIKE '%<$tag%'"; | |
$whereAnys[]= "\"$table\".\"$field\" LIKE '%[$tag%'"; | |
} | |
$sqlSelect->addWhereAny($whereAnys); | |
$records = $sqlSelect->execute(); | |
return $this->checkFieldForRecords($records, $table, $field); | |
} | |
/** | |
* @param Query $records | |
* @param $table | |
* @param $field | |
* @return int Returns the number of valid (needing to be fixed) tags found in all records of this table/field combo | |
*/ | |
public function checkFieldForRecords(Query $records, $table, $field) | |
{ | |
$total = 0; | |
foreach ($records as $row) { | |
$content = $row[$field]; | |
$tags = $this->getTagsInContent($content); | |
$validTags = []; | |
$pattern = sprintf( | |
'/.*(?:<|\[)(?<tagType>%s).*(?<attribute>%s)=(?:"|\')(?<src>[^"]*)(?:"|\')/i', | |
$this->validTagsPattern, | |
$this->validAttributesPattern | |
); | |
foreach ($tags as $tag) { | |
preg_match($pattern, $tag, $matches); | |
$tuple = $matches; | |
if (!isset($tuple['tagType']) || !isset($tuple['src'])) { | |
return null; | |
} | |
if (isset($tuple['src'])) { | |
$src = $tuple['src']; | |
} else if(isset($tuple['href'])) { | |
$src = $tuple['href']; | |
} else { | |
$src = ''; | |
} | |
// Search $src to find any reference to _resampled and only return those that match | |
if (strpos($src, '_resampled')) { | |
$validTags[] = $src; | |
} | |
} | |
if (sizeof($validTags) > 0) { | |
$link = $this->getObjectLink($table, $row['ID']); | |
printf("Found %d tags in %s->%s (ID %d): %s%s", sizeof($validTags), $table, $field, $row['ID'], $link, PHP_EOL); | |
foreach ($validTags as $src) { | |
printf(" * %s%s", $src, PHP_EOL); | |
} | |
echo PHP_EOL; | |
} | |
$total += sizeof($validTags); | |
} | |
return $total; | |
} | |
private function getTagsInContent($content) | |
{ | |
$resultTags = []; | |
$regex = '/<('.$this->validTagsPattern.')\s[^>]*?('.$this->validAttributesPattern.')\s*=.*?>/i'; | |
preg_match_all($regex, $content, $matches, PREG_SET_ORDER); | |
if ($matches) { | |
foreach ($matches as $match) { | |
$resultTags []= $match[0]; | |
} | |
} | |
return $resultTags; | |
} | |
/** | |
* Taken from TagsToShortcodeHelper verbatim | |
* | |
* @param $baseClass | |
* @param $includeBaseClass | |
* @param $fieldNames | |
* @return array | |
* @throws \ReflectionException | |
*/ | |
private function getFieldMap($fieldNames) | |
{ | |
$mapping = []; | |
// Normalise $fieldNames to a string array | |
if (is_string($fieldNames)) { | |
$fieldNames = [$fieldNames]; | |
} | |
// Add the FQNS classnames of the DBFields | |
$extraFieldNames = []; | |
foreach ($fieldNames as $fieldName) { | |
$dbField = Injector::inst()->get($fieldName); | |
if ($dbField && $dbField instanceof DBField) { | |
$extraFieldNames[] = get_class($dbField); | |
} | |
} | |
$fieldNames = array_merge($fieldNames, $extraFieldNames); | |
foreach (ClassInfo::subclassesFor(DataObject::class, true) as $class) { | |
/** @var DataObjectSchema $schema */ | |
$schema = singleton($class)->getSchema(); | |
/** @var DataObject $fields */ | |
$fields = $schema->fieldSpecs($class, DataObjectSchema::DB_ONLY|DataObjectSchema::UNINHERITED); | |
foreach ($fields as $field => $type) { | |
$type = preg_replace('/\(.*\)$/', '', $type); | |
if (in_array($type, $fieldNames)) { | |
$table = $schema->tableForField($class, $field); | |
if (!isset($mapping[$class])) { | |
$mapping[$class] = []; | |
} | |
if (!isset($mapping[$class][$table])) { | |
$mapping[$class][$table] = []; | |
} | |
if (!in_array($field, $mapping[$class][$table])) { | |
$mapping[$class][$table][] = $field; | |
} | |
} | |
} | |
} | |
return $mapping; | |
} | |
/** | |
* @param $table | |
* @param $id | |
* @return string The link to edit this object in the CMS | |
*/ | |
private function getObjectLink($table, $id) | |
{ | |
$knownClasses = self::KNOWN_CLASSES; | |
if (strpos($table, '_Live')) { | |
$table = strstr($table, '_Live', true); | |
} | |
if (!in_array($table, array_keys($knownClasses))) { | |
throw new Exception(sprintf('Don\'t know how to map %s to a valid DataObject, update self::KNOWN_CLASSES', $table)); | |
} | |
$class = $knownClasses[$table]; | |
$obj = $class::get()->byID($id); | |
switch ($class) { | |
case SiteConfig::class: | |
return '/admin/settings'; | |
case SiteTree::class: | |
/** @var SiteTree $obj */ | |
return $obj->CMSEditLink(); | |
} | |
return null; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment