Skip to content

Instantly share code, notes, and snippets.

@philbirnie
Last active October 8, 2020 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philbirnie/71e709ccc53cd9bbeed2fb0f2022ffb8 to your computer and use it in GitHub Desktop.
Save philbirnie/71e709ccc53cd9bbeed2fb0f2022ffb8 to your computer and use it in GitHub Desktop.
Wordpress Image Deduper (Beta)
<?php
require_once 'wp-config.php';
print 'Starting Image De-duper' . "\n";
global $table_prefix;
$host = DB_HOST;
$user = DB_USER;
$password = DB_PASSWORD;
$db = DB_NAME;
$charset = 'utf8mb4';
/**
* Configurable Settings
*/
$batch_size = 100;
$starting_post_id = 10172;
$dsn = "mysql:host=$host;dbname=$db;charset=$charset";
$options = [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
PDO::ATTR_EMULATE_PREPARES => false,
];
try {
$pdo = new PDO( $dsn, $user, $password, $options );
} catch ( \PDOException $e ) {
throw new \PDOException( $e->getMessage(), (int) $e->getCode() );
}
print 'Getting Attachments' . "\n";
/**
* Get the Attachment Post IDs
* and Image Sources
*/
$sql = "SELECT meta_value, post_id
FROM ${table_prefix}postmeta
WHERE meta_key = '_wp_attached_file'
AND post_id > ?
ORDER BY post_id ASC";
$stmt = $pdo->prepare( $sql );
$stmt->execute( [ $starting_post_id ] );
$image_sources_results = $stmt->fetchAll();
$image_sources = [];
foreach ( $image_sources_results as $image_sources_result ) {
$image_sources[ $image_sources_result['post_id'] ] = $image_sources_result['meta_value'];
}
print sprintf( '%d attachment%s found', count( $image_sources ), count( $image_sources ) !== 1 ? 's' : '' ) . "\n";
$count = 0;
foreach ( $image_sources as $image_post_id => $image_source ) {
$count++;
if ( $count > $batch_size ) {
print( 'Batch Complete; Pausing' ) . "\n";
sleep( 5 );
print( 'Resuming...' ) . "\n";
$count = 0;
}
/** If not an image, skip. */
if ( ! is_image( $image_source ) ) {
print sprintf( '%d: Not an Image. Skipping: %s', $image_post_id, $image_source ) . "\n";
continue;
}
/**
* If not a duplicate candidate, skip
*/
if ( ! is_duplicate_candidate( $image_source ) ) {
print sprintf( '%d: Not a Duplicate Candidate. Skipping: %s', $image_post_id, $image_source ) . "\n";
continue;
}
/** @var string $root_image Root Image that can be used for comparison. */
$root_image = get_root( $image_source );
/**
* Get the Root Image ID if it exists in the array
*/
$root_image_post_id = array_search($root_image, $image_sources);
/**
* If there is no root image or the root image is the current image,
* don't delete it.
*/
if ( ! $root_image_post_id || $root_image_post_id === $image_post_id ) {
print sprintf( '%d: Original. Skipping: %s', $image_post_id, $image_source ) . "\n";
continue;
}
/**
* We have a successful duplicate image candidate;
* first, update the thumbnail
*/
$sql = "UPDATE ${table_prefix}postmeta
SET meta_value = ?
WHERE meta_value = ?
AND meta_key = '_thumbnail_id'";
$stmt = $pdo->prepare( $sql )->execute( [ $root_image_post_id, $image_post_id ] );
/**
* Updates Gallery Images; these are comma separated and the safest
* bet is to extract the full string, change it to an array; update the array if necessary and
* return it to the DB.
*/
$sql = "SELECT meta_id,meta_value
FROM ${table_prefix}postmeta
WHERE meta_key = '_product_image_gallery'
AND meta_value LIKE '%${image_post_id}%'
";
$stmt = $pdo->query( $sql );
$results = $stmt->fetchAll();
/**
* If there are gallery images, update the images; the best way to do this is to extract the entire
* string as an array, replace it and save it to prevent issues with "partials" (e.g. 28 matches 5228)
*/
if ( $results ) {
foreach ( $results as $result ) {
$gallery_image_ids = explode( ',', $result['meta_value'] );
$meta_id = $result['meta_id'];
/**
* If the image id is actually in the array, replace it and update the record.
*/
if ( $gallery_image_ids && in_array( $image_post_id, $gallery_image_ids ) ) {
$gallery_image_ids = str_replace( $image_post_id, $root_image_post_id, $gallery_image_ids );
$sql = "UPDATE ${table_prefix}postmeta
SET meta_value = ?
WHERE meta_id = ?
LIMIT 1";
$stmt = $pdo->prepare( $sql )->execute( [ implode( ',', $gallery_image_ids ), $meta_id ] );
print sprintf( 'Updating gallery images for %d, from: %d, to: %d', $meta_id, $image_post_id, $root_image_post_id ) . "\n";
}
}
}
print sprintf( 'Deleting %d: %s', $image_post_id, $image_source ) . "\n";
$command = "wp post delete --force ${image_post_id} --allow-root --skip-plugins --skip-themes";
exec( $command );
remove_media_images( $image_source );
}
print 'De-Deuper Process Complete' . "\n";
exit( 0 );
function is_image( $source ): bool {
if ( ! $source || strlen( $source ) < 3 ) {
return false;
}
$valid_image_suffixes = [ 'jpg', 'png', 'svg', 'peg' ];
$suffix = strtolower( substr( $source, -3, 3 ) );
return in_array( $suffix, $valid_image_suffixes, true );
}
/**
* Returns Image Source
*
* @param string $source Image Source
*
* @notes Duplicate Candidates will be -1.jpg (note that there is not a zero, so these numbers are intentially excluded
*
* @return bool
*/
function is_duplicate_candidate( $source ): bool {
return (bool) preg_match( "/-[1-9](\d+)?\.([jJpPgGnNsSvVeE]{3,4})$/", $source );
}
/**
* Gets Likely Root Image by removing xx part of -xx.jpg
*
* @param $source
*
* @return string|string[]|null
*/
function get_root( $source ) {
$pattern = "/-(\d+)\.([jJpPgGnNsSvVeE]{1,4})$/";
$replace = '.$2';
return preg_replace( $pattern, $replace, $source );
}
function remove_media_images( $source ) {
$fullpath = __DIR__ . '/wp-content/uploads/' . $source;
$directory = dirname( $fullpath );
$files = scandir( $directory );
if ( ! $files ) {
return;
}
$file_base = preg_replace( "/\.([jJpPgGnNsSvVeE]{1,4})$/", '', basename( $source ) );
$media_variants = array_filter( $files ?? [], function ( $file ) use ( $file_base ) {
$file_ending = str_replace( $file_base, '', $file );
return preg_match( "/^-\d{1,}x\d{1,}\.[A-Za-z]{3,4}$/", $file_ending );
} );
foreach ( $media_variants as $media_variant ) {
unlink( sprintf( '%s/%s', $directory, $media_variant ) );
}
if ( count( $media_variants ) ) {
print( sprintf( 'Removed %d rogue media variants', count( $media_variants ) ) ) . "\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment