Skip to content

Instantly share code, notes, and snippets.

@omerida
Last active August 29, 2015 14:27
Show Gist options
  • Save omerida/ab4f8ede25263b6e18cd to your computer and use it in GitHub Desktop.
Save omerida/ab4f8ede25263b6e18cd to your computer and use it in GitHub Desktop.
Helper functions for rewrting images in body fields from remote to local
<?php
/**
* Updates remote image tags to work locally.
*
* @param $node
*/
function _rewrite_images($body)
{
// quick test before we commit to DOM processing
// must have images from our old
// domains 'example.com' or 'example.org'
// this assumes all images are full URIs so adjust accordingly
if (false === strpos($body, 'img')
&& false === strpos($body, 'example.com')
&& false === strpos($body, 'example.org')) {
return $body;
}
// look for actual image tags
if ($images = _extractImages($body)) {
// keep only images from our domains
$images = array_filter(
$images,
function ($image) {
$allowed = array('example.com', 'example.org');
$host = parse_url($image['src'], PHP_URL_HOST);
return in_array($host, $allowed);
}
);
// import remaining to drupal files
$map = _import_images($images);
// prepare url replacements
$replacements = array();
foreach ($map as $key => &$file) {
$file->url = file_create_url($file->uri);
$file->parts = parse_url($file->url);
// map old src attributes to new values
$replacements['src="' . $key . '"']
= 'src="' . str_replace('//', '/', $file->parts['path']) . '"';
}
// update references in body
$body = preg_replace_callback(
'/(\<img[^>]+\>)/',
function($match) use ($replacements) {
$match[1] = str_replace(
array_keys($replacements),
array_values($replacements),
$match[1]
);
return $match[1];
}, $body);
}
return $body;
}
/**
* extractImages
*
* @param $text HTML fragment to update
* @return array|bool
*/
function _extractImages($text) {
$header = '<meta http-equiv="Content-Type" '
. 'content="text/html; charset=UTF-8" />';
$text = $header . $text;
$dom = new DOMDocument();
if (@$dom->loadHTML($text)) {
$xpath = new DOMXpath($dom);
if ($images = $xpath->evaluate("//img")) {
$result = array();
foreach ($images as $i => $img) {
// ignore if it doesn't end in an image file extension
$src = $img->getAttribute('src');
if (!preg_match('/.(png|jpg|jpeg|gif)$/i', parse_url($src, PHP_URL_PATH))) {
continue;
}
// fix protocol-relative URLs (start with //)
if (preg_match('/^\/\//', $src)) {
$src = 'http:' . $src;
}
// ignore if its not http(s)
if (!preg_match('/^https?:\/\//', $src)) {
continue;
}
$result[$i] = array('src' => $src);
}
if (!empty($result)) {
return $result;
}
}
}
return false;
}
/**
* Take our array of found images and save them as
* local files
* @param array $images
* @return array
*/
function _import_images($images)
{
// save them in imags subdir
$uri_base = 'public://images';
$perms = FILE_CREATE_DIRECTORY || FILE_MODIFY_PERMISSIONS;
$map = array();
foreach ($images as $image) {
$path = parse_url($image['src']);
$path = preg_replace(
'/^\/images\/(.+)/', '$1', $path['path']);
// make our target directory
$dir = $uri_base . dirname('/' . $path);
if (file_prepare_directory($dir, $perms)) {
if ($file = system_retrieve_file($image['src'],
$uri_base . '/' . $path, TRUE,
FILE_EXISTS_REPLACE)) {
// if saved, add it to our map to track
// old sources to new location
$map[$image['src']] = $file;
}
}
}
return $map;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment