Last active
August 29, 2015 14:27
-
-
Save omerida/ab4f8ede25263b6e18cd to your computer and use it in GitHub Desktop.
Helper functions for rewrting images in body fields from remote to local
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Updates remote image tags to work locally. | |
* | |
* @param $node | |
*/ | |
function _rewrite_images($body) | |
{ | |
// quick test before we commit to DOM processing | |
// must have images from our old | |
// domains 'example.com' or 'example.org' | |
// this assumes all images are full URIs so adjust accordingly | |
if (false === strpos($body, 'img') | |
&& false === strpos($body, 'example.com') | |
&& false === strpos($body, 'example.org')) { | |
return $body; | |
} | |
// look for actual image tags | |
if ($images = _extractImages($body)) { | |
// keep only images from our domains | |
$images = array_filter( | |
$images, | |
function ($image) { | |
$allowed = array('example.com', 'example.org'); | |
$host = parse_url($image['src'], PHP_URL_HOST); | |
return in_array($host, $allowed); | |
} | |
); | |
// import remaining to drupal files | |
$map = _import_images($images); | |
// prepare url replacements | |
$replacements = array(); | |
foreach ($map as $key => &$file) { | |
$file->url = file_create_url($file->uri); | |
$file->parts = parse_url($file->url); | |
// map old src attributes to new values | |
$replacements['src="' . $key . '"'] | |
= 'src="' . str_replace('//', '/', $file->parts['path']) . '"'; | |
} | |
// update references in body | |
$body = preg_replace_callback( | |
'/(\<img[^>]+\>)/', | |
function($match) use ($replacements) { | |
$match[1] = str_replace( | |
array_keys($replacements), | |
array_values($replacements), | |
$match[1] | |
); | |
return $match[1]; | |
}, $body); | |
} | |
return $body; | |
} | |
/** | |
* extractImages | |
* | |
* @param $text HTML fragment to update | |
* @return array|bool | |
*/ | |
function _extractImages($text) { | |
$header = '<meta http-equiv="Content-Type" ' | |
. 'content="text/html; charset=UTF-8" />'; | |
$text = $header . $text; | |
$dom = new DOMDocument(); | |
if (@$dom->loadHTML($text)) { | |
$xpath = new DOMXpath($dom); | |
if ($images = $xpath->evaluate("//img")) { | |
$result = array(); | |
foreach ($images as $i => $img) { | |
// ignore if it doesn't end in an image file extension | |
$src = $img->getAttribute('src'); | |
if (!preg_match('/.(png|jpg|jpeg|gif)$/i', parse_url($src, PHP_URL_PATH))) { | |
continue; | |
} | |
// fix protocol-relative URLs (start with //) | |
if (preg_match('/^\/\//', $src)) { | |
$src = 'http:' . $src; | |
} | |
// ignore if its not http(s) | |
if (!preg_match('/^https?:\/\//', $src)) { | |
continue; | |
} | |
$result[$i] = array('src' => $src); | |
} | |
if (!empty($result)) { | |
return $result; | |
} | |
} | |
} | |
return false; | |
} | |
/** | |
* Take our array of found images and save them as | |
* local files | |
* @param array $images | |
* @return array | |
*/ | |
function _import_images($images) | |
{ | |
// save them in imags subdir | |
$uri_base = 'public://images'; | |
$perms = FILE_CREATE_DIRECTORY || FILE_MODIFY_PERMISSIONS; | |
$map = array(); | |
foreach ($images as $image) { | |
$path = parse_url($image['src']); | |
$path = preg_replace( | |
'/^\/images\/(.+)/', '$1', $path['path']); | |
// make our target directory | |
$dir = $uri_base . dirname('/' . $path); | |
if (file_prepare_directory($dir, $perms)) { | |
if ($file = system_retrieve_file($image['src'], | |
$uri_base . '/' . $path, TRUE, | |
FILE_EXISTS_REPLACE)) { | |
// if saved, add it to our map to track | |
// old sources to new location | |
$map[$image['src']] = $file; | |
} | |
} | |
} | |
return $map; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment