Skip to content

Instantly share code, notes, and snippets.

@ChrisHardie
Created February 18, 2020 16:55
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ChrisHardie/f356b2705e6659f0ec9f0b826ca87650 to your computer and use it in GitHub Desktop.
Flickr-to-WordPress: a plugin to use a WordPress powered API to find/replace Flickr references in another WordPress site
<?php
/**
* Plugin Name: Flickr Fixer
* Plugin URI: https://chrishardie.com/
* Description: Find/replace Flickr references
* Version: 1.0
* Author: Chris Hardie
* Author URI: https://chrishardie.com/
*/
if ( ! defined( 'WP_CLI' ) || ! WP_CLI ) {
return;
}
class JCH_Flickr_Fixer extends WP_CLI_Command {
/**
* Fix Flickr References
*
* ## OPTIONS
*
* [--post_id=<id>]
* : Specify an individual post ID to work with
*
* [--dry_run=false]
* : Actually update post content in a database write operation
*
* [--debug]
* : Display debug output
*
* @subcommand fix-refs [--dry_run=false] [--post_id=<id>] [--debug]
*
* @param array $args Args.
* @param array $args_assoc Associative args.
*/
public function fix_refs( $args, $assoc_args ) {
global $wpdb;
// Get a specific post, or all the posts.
if ( isset( $assoc_args['post_id'] ) && is_numeric( $assoc_args['post_id'] ) ) {
WP_CLI::line( 'Getting selected post...' );
$flickr_post_ids = $wpdb->get_col( "SELECT ID FROM {$wpdb->posts} WHERE post_type = 'post' AND ID = " . (int) $assoc_args['post_id'] );
} else {
WP_CLI::line( 'Getting all posts containing Flickr references...' );
$flickr_post_ids = $wpdb->get_col( "SELECT ID FROM {$wpdb->posts} WHERE post_type = 'post' AND ( post_content LIKE '%%" . esc_sql( $wpdb->esc_like( 'flickr.com/photos/chrishardie' ) ) . "%%' OR post_content LIKE '%%" . esc_sql( $wpdb->esc_like( 'flickr.com/photos/11288301@N00' ) ) . "%%' )" );
}
$flickr_post_count = count( $flickr_post_ids );
WP_CLI::line( 'Found ' . (int) $flickr_post_count . ' posts to process.' );
if ( ! $flickr_post_count ) {
WP_CLI::line( 'No work to do! Exiting.' );
exit;
}
$progress = \WP_CLI\Utils\make_progress_bar( 'Processing Flickr references', $flickr_post_count );
// Keep track of all replacements across all posts.
$replacements_made = 0;
// Work on 20 posts at a time.
while ( $post_ids = array_splice( $flickr_post_ids, 0, 20 ) ) {
$posts = $wpdb->get_results( "SELECT ID, post_content FROM {$wpdb->posts} WHERE post_type = 'post' AND ID IN (" . implode( ',', $post_ids ) . ')' );
if ( empty( $posts ) ) {
break;
}
foreach ( $posts as $the_post ) {
// I couldn't get edit_post_link() to work. This is dumb.
$admin_edit_url = admin_url( 'post.php?post=' ) . $the_post->ID . '&action=edit';
WP_CLI::debug( "Working on Post ID: {$the_post->ID}: " . $admin_edit_url );
// We don't end up using the $original_content variable but you could use it for an extra comparison check if needed.
$original_content = $the_post->post_content;
$new_content = $original_content;
// Keep track of the number of Flickr references found in this post.
$ref_count = 0;
// Despite all this fancy automation, there are still going to be references that need special/manual handling.
// This array is for that. The key is the string you want to find, the value is the string to replace it with.
$manual_find_replace = array(
'11288301@N00' => 'chrishardie', // Older references to my Flickr user ID
'http://www.flickr.com/photos/chrishardie/sets/' => 'https://photos.chrishardie.com/flickrset/', // References to Flickr sets
'https://www.flickr.com/photos/chrishardie/sets/' => 'https://photos.chrishardie.com/flickrset/',
'http://www.flickr.com/photos/chrishardie/tags/' => 'https://photos.chrishardie.com/tag/', // Flickr tags
'https://www.flickr.com/photos/chrishardie/tags/' => 'https://photos.chrishardie.com/tag/',
'href="http://www.flickr.com/photos/chrishardie"' => 'href="https://photos.chrishardie.com/"', // Plain old links to my profile
'href="http://www.flickr.com/photos/chrishardie/"' => 'href="https://photos.chrishardie.com/"',
);
// Given those manual items, fix them quickly and move on.
foreach ( $manual_find_replace as $key => $manual_ref ) {
$new_content = str_replace( $key, $manual_ref, $new_content, $manual_replacements_made );
if ( 0 < $manual_replacements_made ) {
$ref_count++;
$replacements_made += $manual_replacements_made;
}
}
// Now for the fun stuff.
// Find <a> references with or without <img> references inside
preg_match_all( '/(<a[^>]+?href="https?:\/\/www.flickr.com\/photos\/chrishardie\/\d+.+?[<img.+?\/>]?<\/a>)/', $new_content, $refs_complex, PREG_SET_ORDER );
foreach ( $refs_complex as $ref ) {
$ref_count++;
// It would be too hard to use regexp parsing given all the different ways the attributes of a tag can be ordered.
// Instead, use DOMDocument to programmatically parse the tag attributes and update them.
$dom = new DOMDocument();
// Get the HTML of the pattern we matched above.
$dom->loadHTML( $ref[0] );
// Get the A and IMG tags into an array
$a_tags = $dom->getElementsByTagName( 'a' );
$img_tags = $dom->getElementsByTagName( 'img' );
// We're only expecting one link and one image per match.
if ( 1 < count( $a_tags ) || 1 < count( $img_tags ) ) {
WP_CLI::warning( 'More than one a tag or img tag: ' . $admin_edit_url );
} else {
// Extract the Flickr link in particular we want to replace
$flickr_link = preg_replace( '/(https?:\/\/www\.flickr\.com\/photos\/chrishardie\/\d+\/)(in\/\S+)?/', '$1', $a_tags[0]->getAttribute( 'href' ) );
// For our API lookup, add a trailing slash if there isn't one
$flickr_link = rtrim( $flickr_link, '/' ) . '/';
// Get width and height for the img tag, if there is one
// This allows us to retrieve an img src value appropriate for replacing, instead of full size
if ( ! empty( $img_tags[0] ) ) {
$width = $img_tags[0]->getAttribute( 'width' );
$height = $img_tags[0]->getAttribute( 'height' );
} else {
$width = null;
$height = null;
}
// Look it up on our WordPress photo site
$photo_data = $this->get_photo_post_by_flickr_id( $flickr_link, $width, $height );
// See if we got something back
if ( ! $photo_data ) {
WP_CLI::warning( 'Could not find a valid photo site post for ' . $flickr_link . ' in ' . $admin_edit_url );
continue;
} else {
WP_CLI::debug( 'Found valid photo site post for Flickr URL.' );
WP_CLI::debug( $flickr_link . ' --> ' . $photo_data->permalink );
// Replace it
$a_tags[0]->setAttribute( 'href', $photo_data->permalink );
$dom->saveHTML( $a_tags[0] );
$replacements_made++;
// If we're working with an img tag, update it too
if ( ! empty( $img_tags[0] ) ) {
if ( empty( $photo_data->thumbnail_url ) ) {
WP_CLI::warning( 'No valid replacement image src url in ' . $admin_edit_url );
} else {
$old_img_src = $img_tags[0]->getAttribute( 'src' );
WP_CLI::debug( $old_img_src . ' --> ' . $photo_data->thumbnail_url );
$img_tags[0]->setAttribute( 'src', $photo_data->thumbnail_url );
$dom->saveHTML( $img_tags[0] );
$replacements_made++;
}
}
# Strip out the DOCTYPE, html & body tags that DOMDocument adds to the HTML doc
$final_html = preg_replace( '~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i', '', $dom->saveHTML() );
// Update the content variable with the right HTML
$new_content = str_replace( $ref[0], $final_html, $new_content );
}
}
}
// Find remaining references - usually a link on a line by itself for oembed use, or inside an <a> tag
preg_match_all( '/(https?:\/\/www\.flickr\.com\/photos\/chrishardie\/\d+\/)(in\S+)?/', $new_content, $refs_solo, PREG_SET_ORDER );
foreach ( $refs_solo as $ref ) {
$ref_count++;
$found_flickr_ref = $ref[1];
// The first array slot contains the matched Flickr photo URL
$photo_data = self::get_photo_post_by_flickr_id( $found_flickr_ref );
if ( ! $photo_data ) {
WP_CLI::warning( 'Could not find a valid photo site post for ' . $found_flickr_ref . ' in ' . $admin_edit_url );
continue;
} else {
WP_CLI::debug( 'Found valid photo site post for Flickr URL.' );
WP_CLI::debug( $found_flickr_ref . ' --> ' . $photo_data->permalink );
// Update the content
$new_content = str_replace(
$ref[0], // The original matched pattern in full
$photo_data->permalink,
$new_content,
$solo_replacements_made
);
$replacements_made += $solo_replacements_made;
}
}
// See if we found any references that we touched, so we know if an actual post content update is needed.
if ( 0 < $ref_count ) {
WP_CLI::debug( $ref_count . ' Flickr references found, so updating content...' );
$post_to_update = array(
'ID' => $the_post->ID,
'post_content' => $new_content,
);
// Only do an actual database update if the dry_run=false flag is passed.
if ( ! empty( $assoc_args['dry_run'] ) && 'false' === $assoc_args['dry_run'] ) {
$update_result = wp_update_post( $post_to_update );
if ( is_wp_error( $update_result ) ) {
WP_CLI::error( 'There was a problem updating the post content.' );
} else {
WP_CLI::debug( 'Post successfully updated.' );
clean_post_cache( $the_post->ID );
WP_CLI::line( 'Updated post: ' . get_the_permalink( $the_post->ID ) );
}
} else {
WP_CLI::debug( 'Dry run only, so not actually making DB changes.' );
}
} else {
WP_CLI::warning( 'No references found in processing, probably something missing in ' . $admin_edit_url );
}
$progress->tick();
}
// Do some memory cleanup so we don't lose control.
self::stop_the_insanity();
}
$progress->finish();
WP_CLI::success( $replacements_made . ' replacement(s) made across all posts' );
}
/**
* Clear all of the caches for memory management
*/
public static function stop_the_insanity() {
/**
* @var \WP_Object_Cache $wp_object_cache
* @var \wpdb $wpdb
*/
global $wpdb, $wp_object_cache;
$wpdb->queries = array(); // or define( 'WP_IMPORTING', true );
if ( is_object( $wp_object_cache ) ) {
$wp_object_cache->group_ops = array();
$wp_object_cache->stats = array();
$wp_object_cache->memcache_debug = array();
$wp_object_cache->cache = array();
if ( method_exists( $wp_object_cache, '__remoteset' ) ) {
$wp_object_cache->__remoteset(); // important
}
}
}
/**
* Look up the photo site post info via API using the Flickr ID and possibly width/height
*/
public function get_photo_post_by_flickr_id( $flickr_url = null, $width = null, $height = null ) {
$cache_group = 'flickr_fixer';
$cache_expire = 7 * DAY_IN_SECONDS;
if ( empty( $flickr_url ) ) {
return false;
}
// Always use the SSL version, since that's what was provided in the Flickr site data export/import process.
$flickr_url = preg_replace( '/^http:/i', 'https:', $flickr_url );
$photo_site_api_url = 'https://my-wp-photo-website.com/wp-json/myphotos/v1/find-by-flickr-url/';
$cache_key = $flickr_url;
// This is the main key we use to look up the presence of a Flickr photo on the new WordPress site
$photo_site_api_url = add_query_arg(
array(
'flickr-url' => $flickr_url,
),
$photo_site_api_url
);
// If we received a width, use it
if ( is_numeric( $width ) ) {
$photo_site_api_url = add_query_arg(
array(
'width' => $width,
),
$photo_site_api_url
);
$cache_key .= '--' . $width;
}
// If we received a height, use it
if ( is_numeric( $height ) ) {
$photo_site_api_url = add_query_arg(
array(
'height' => $height,
),
$photo_site_api_url
);
$cache_key .= '--' . $height;
}
// It makes sense to cache the API responses from our WordPress-hosted photo site, they'll rarely change.
WP_CLI::debug( 'Checking cache for ' . $cache_key );
$cache_data = wp_cache_get( $cache_key, $cache_group, false, $found );
if ( ( true === $found ) && ! empty( $cache_data ) ) {
WP_CLI::debug( 'Cache hit' );
return $cache_data;
}
// If no cache hit, do an actual lookup via API.
WP_CLI::debug( 'Fetching ' . $photo_site_api_url );
$photo_site_request = wp_remote_get( $photo_site_api_url );
// Make sure we got a valid JSON response
if ( is_wp_error( $photo_site_request ) ) {
return false;
}
$photo_site_response = wp_remote_retrieve_body( $photo_site_request );
$photo_data = json_decode( $photo_site_response, false );
// If we got a valid result from the API, cache and return it
if ( ! empty( $photo_data ) && 'found' === $photo_data->result ) {
$cache_set_result = wp_cache_set( $cache_key, $photo_data, $cache_group, $cache_expire );
if ( false === $cache_set_result ) {
WP_CLI::error( 'Failed to set the cache value for key ' . $cache_key );
}
return $photo_data;
} else {
return false;
}
}
}
// Register this command.
WP_CLI::add_command( 'flickr-fixer', 'JCH_Flickr_Fixer' );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment