Last active
March 3, 2017 20:53
-
-
Save brooke-heaton/67d51e74304d00e6dbccc1650736fa15 to your computer and use it in GitHub Desktop.
Find and replace absolute Drupal 6 URLS with aliases so that Migrate module and redirect them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Helper class to pass parameters to callback function within preg_replace_callback | |
* to replace absolute D6 urls with D6 aliases | |
*/ | |
class InternalLinksConverter { | |
function replace($matches) { | |
if (isset($this->link_aliases)) { | |
$link_aliases = $this->link_aliases; | |
} | |
$first_delimeter = isset($matches[0]) ? $matches[0] : ''; | |
$slug = isset($matches[3]) ? $matches[3] : ''; | |
$link_alias = isset($link_aliases[$slug]) ? $link_aliases[$slug] : ''; | |
// hashtag is not exists - show without link | |
if (empty($link_alias)) { | |
return $first_delimeter; | |
// Log error | |
} | |
return $link_alias; | |
} | |
} | |
/** | |
* Helper function to Create and return comma separated string of slugs related to entities | |
* (/node/nid or /taxonomy/term or user/) | |
* | |
* @param string $text of the body field | |
* @param boolean $is_string_return | |
* @param boolean $capture_position | |
* @return string or array $result of any discovered permalinks ($slug) found | |
* | |
*/ | |
function ParseBodyLinks($text, $is_string_return = TRUE, $capture_position = FALSE) { | |
// capture_position == PREG_OFFSET_CAPTURE; | |
// save position | |
$flag = $capture_position ? PREG_OFFSET_CAPTURE : PREG_PATTERN_ORDER; | |
$links_list = array(); | |
$pattern = "/((http|https):\/\/www\.website\.com\/)?(node\/\w+|taxonomy\/term\/\w+|user\/\w+)/"; | |
// add <htest> to process first #hastag - string beginning | |
preg_match_all($pattern, '<test>'.$text.'<test>', $links_list, $flag); | |
// no hashtags has been found | |
if (isset($links_list[0]) && !sizeof($links_list[0])) { | |
if ($is_string_return) { | |
return ''; | |
} | |
return array(); | |
} | |
// save position | |
if ($capture_position) { | |
foreach ($links_list[1] as $key => $data) { | |
// array[position] = link | |
$result[$data[1]] = strtolower($data[0]); | |
} | |
} else { | |
// assign the node id to the array | |
foreach ($links_list[3] as $key => $slug) { | |
$links_list[3][$key] = $slug; | |
} | |
// implode and return | |
if ($is_string_return) { | |
$result = implode(',', $links_list[3]); | |
} else { | |
$result = $links_list[3]; | |
} | |
} | |
if (isset($result)) { | |
return $result; | |
} | |
else return ''; | |
} | |
/** | |
* Helper function to check the permalink slug and return the | |
* path alias if available | |
* | |
* @param array $slugs, should be string values | |
* @return array - aliases of each permalink | |
* | |
*/ | |
protected function GetD6Aliases($slugs) { | |
$aliases = array(); | |
foreach ($slugs as $slug) { | |
$query = Database::getConnection('default', | |
$this->arguments['source_connection']) | |
->select('url_alias', 'ua') | |
->fields('ua') | |
->condition('ua.src', $slug, '='); | |
$results = $query->execute(); | |
foreach ($results as $result) { | |
$aliases[$result->src] = $result->dst; | |
} | |
} | |
if (!empty($aliases)) { | |
return $aliases; | |
} | |
else { | |
return ''; | |
} | |
} | |
/** | |
* Helper function to check send the body field for parsing in | |
* a custom helper function and to return the aliases for any | |
* discovered permalink paths and replace this path in the link | |
* | |
* @param string $text of the body field | |
* @return string $text of the body field with permalinks | |
* replaced, if found | |
* | |
*/ | |
protected function ProcessBodyLinks($text) { | |
// Send to helper function to parse links and identify node id | |
$slugs = ParseBodyLinks($text, FALSE); | |
// No internal links found in text return original text | |
if (!sizeof($slugs)) { return $text; } | |
$link_aliases = $this->GetD6Aliases($slugs); | |
// No aliases found return original text | |
if (!sizeof($link_aliases)) { return $text; } | |
// Create a class to pass parameters and have replace logic | |
$replace_parameter = new InternalLinksConverter; | |
$replace_parameter->link_aliases = $link_aliases; | |
// Replace the absolute url with the alias | |
$pattern = "/((http|https):\/\/www\.website\.com\/)?(node\/\w+|taxonomy\/term\/\w+|user\/\w+)/"; | |
$text = preg_replace_callback($pattern, array(&$replace_parameter, 'replace'), $text); | |
return $text; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment