Skip to content

Instantly share code, notes, and snippets.

@madmatt
Created February 17, 2020 22:32
Show Gist options
  • Save madmatt/4fc2c0a809fbafe5e480480fc9dcc947 to your computer and use it in GitHub Desktop.
Save madmatt/4fc2c0a809fbafe5e480480fc9dcc947 to your computer and use it in GitHub Desktop.
Task to find and print out all HTML WYSIWYG fields that contain SS3.0/3.1-era resampled images that the SS4 MigrateFileTask will fail to fix
<?php
namespace App\Tasks;
use Exception;
use PHPUnit\ExampleExtension\Comparator;
use SilverStripe\CMS\Model\SiteTree;
use SilverStripe\Control\HTTPRequest;
use SilverStripe\Core\ClassInfo;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\Dev\BuildTask;
use SilverStripe\ORM\Connect\Query;
use SilverStripe\ORM\DataObject;
use SilverStripe\ORM\DataObjectSchema;
use SilverStripe\ORM\DB;
use SilverStripe\ORM\FieldType\DBField;
use SilverStripe\ORM\Queries\SQLSelect;
use SilverStripe\SiteConfig\SiteConfig;
use SilverStripe\Versioned\Versioned;
class FindAllHTMLFieldsTask extends BuildTask
{
const VALID_TAGS = [
'img' => 'image',
'a' => 'file_link'
];
const VALID_ATTRIBUTES = [
'src',
'href'
];
const KNOWN_CLASSES = [
'SiteConfig' => SiteConfig::class,
'SiteTree' => SiteTree::class,
];
private static $segment = 'FindAllHTMLFieldsTask';
/** @var string */
private $validTagsPattern;
/** @var string */
private $validAttributesPattern;
public function __construct()
{
parent::__construct();
$this->validTagsPattern = implode('|', array_keys(static::VALID_TAGS));
$this->validAttributesPattern = implode('|', static::VALID_ATTRIBUTES);
}
/**
* Find all the HTML fields attached to all DataObjects to find any that contain broken resampled images
*
* @param HTTPRequest $request
* @return
*/
public function run($request)
{
$classes = $this->getFieldMap([
'HTMLText',
'HTMLVarchar'
]);
$tableList = DB::table_list();
$count = 0;
foreach ($classes as $class => $tables) {
/** @var DataObject $singleton */
$singleton = singleton($class);
$hasVersions =
class_exists(Versioned::class) &&
$singleton->hasExtension(Versioned::class) &&
$singleton->hasStages();
foreach ($tables as $table => $fields) {
foreach ($fields as $field) {
/** @var DBField $dbField */
$dbField = DataObject::singleton($class)->dbObject($field);
if ($dbField &&
$dbField->hasMethod('getProcessShortcodes') &&
!$dbField->getProcessShortcodes()) {
continue;
}
if (!isset($tableList[strtolower($table)])) {
// When running unit test some tables won't be created. We'll just skip those.
continue;
}
// Update table
$count += $this->findShortcodesInField($table, $field);
// Update live
if ($hasVersions) {
$count += $this->findShortcodesInField($table.'_Live', $field);
}
}
}
}
printf("===================================================%s", PHP_EOL);
printf(" SUMMARY: Found %d images that need to be replaced %s", $count, PHP_EOL);
printf("===================================================%s", PHP_EOL);
return;
}
/**
* @param $table
* @param $field
* @return int Returns the number of valid (needing to be fixed) tags found in this field
*/
public function findShortcodesInField($table, $field)
{
$sqlSelect = SQLSelect::create(['"ID"', "\"$field\""], "\"$table\"");
$whereAnys = [];
foreach (array_keys(static::VALID_TAGS) as $tag) {
$whereAnys[]= "\"$table\".\"$field\" LIKE '%<$tag%'";
$whereAnys[]= "\"$table\".\"$field\" LIKE '%[$tag%'";
}
$sqlSelect->addWhereAny($whereAnys);
$records = $sqlSelect->execute();
return $this->checkFieldForRecords($records, $table, $field);
}
/**
* @param Query $records
* @param $table
* @param $field
* @return int Returns the number of valid (needing to be fixed) tags found in all records of this table/field combo
*/
public function checkFieldForRecords(Query $records, $table, $field)
{
$total = 0;
foreach ($records as $row) {
$content = $row[$field];
$tags = $this->getTagsInContent($content);
$validTags = [];
$pattern = sprintf(
'/.*(?:<|\[)(?<tagType>%s).*(?<attribute>%s)=(?:"|\')(?<src>[^"]*)(?:"|\')/i',
$this->validTagsPattern,
$this->validAttributesPattern
);
foreach ($tags as $tag) {
preg_match($pattern, $tag, $matches);
$tuple = $matches;
if (!isset($tuple['tagType']) || !isset($tuple['src'])) {
return null;
}
if (isset($tuple['src'])) {
$src = $tuple['src'];
} else if(isset($tuple['href'])) {
$src = $tuple['href'];
} else {
$src = '';
}
// Search $src to find any reference to _resampled and only return those that match
if (strpos($src, '_resampled')) {
$validTags[] = $src;
}
}
if (sizeof($validTags) > 0) {
$link = $this->getObjectLink($table, $row['ID']);
printf("Found %d tags in %s->%s (ID %d): %s%s", sizeof($validTags), $table, $field, $row['ID'], $link, PHP_EOL);
foreach ($validTags as $src) {
printf(" * %s%s", $src, PHP_EOL);
}
echo PHP_EOL;
}
$total += sizeof($validTags);
}
return $total;
}
private function getTagsInContent($content)
{
$resultTags = [];
$regex = '/<('.$this->validTagsPattern.')\s[^>]*?('.$this->validAttributesPattern.')\s*=.*?>/i';
preg_match_all($regex, $content, $matches, PREG_SET_ORDER);
if ($matches) {
foreach ($matches as $match) {
$resultTags []= $match[0];
}
}
return $resultTags;
}
/**
* Taken from TagsToShortcodeHelper verbatim
*
* @param $baseClass
* @param $includeBaseClass
* @param $fieldNames
* @return array
* @throws \ReflectionException
*/
private function getFieldMap($fieldNames)
{
$mapping = [];
// Normalise $fieldNames to a string array
if (is_string($fieldNames)) {
$fieldNames = [$fieldNames];
}
// Add the FQNS classnames of the DBFields
$extraFieldNames = [];
foreach ($fieldNames as $fieldName) {
$dbField = Injector::inst()->get($fieldName);
if ($dbField && $dbField instanceof DBField) {
$extraFieldNames[] = get_class($dbField);
}
}
$fieldNames = array_merge($fieldNames, $extraFieldNames);
foreach (ClassInfo::subclassesFor(DataObject::class, true) as $class) {
/** @var DataObjectSchema $schema */
$schema = singleton($class)->getSchema();
/** @var DataObject $fields */
$fields = $schema->fieldSpecs($class, DataObjectSchema::DB_ONLY|DataObjectSchema::UNINHERITED);
foreach ($fields as $field => $type) {
$type = preg_replace('/\(.*\)$/', '', $type);
if (in_array($type, $fieldNames)) {
$table = $schema->tableForField($class, $field);
if (!isset($mapping[$class])) {
$mapping[$class] = [];
}
if (!isset($mapping[$class][$table])) {
$mapping[$class][$table] = [];
}
if (!in_array($field, $mapping[$class][$table])) {
$mapping[$class][$table][] = $field;
}
}
}
}
return $mapping;
}
/**
* @param $table
* @param $id
* @return string The link to edit this object in the CMS
*/
private function getObjectLink($table, $id)
{
$knownClasses = self::KNOWN_CLASSES;
if (strpos($table, '_Live')) {
$table = strstr($table, '_Live', true);
}
if (!in_array($table, array_keys($knownClasses))) {
throw new Exception(sprintf('Don\'t know how to map %s to a valid DataObject, update self::KNOWN_CLASSES', $table));
}
$class = $knownClasses[$table];
$obj = $class::get()->byID($id);
switch ($class) {
case SiteConfig::class:
return '/admin/settings';
case SiteTree::class:
/** @var SiteTree $obj */
return $obj->CMSEditLink();
}
return null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment