Skip to content

Instantly share code, notes, and snippets.

@pentagonal
Last active April 2, 2016 09:57
Show Gist options
  • Save pentagonal/4c2c168233c12ad54a29 to your computer and use it in GitHub Desktop.
Save pentagonal/4c2c168233c12ad54a29 to your computer and use it in GitHub Desktop.
Entities Non ASCII characters
<?php
/**
* Entities the Multibytes string
* Iconv must be enable to use this function work properly
*
* @param string $string the string to detect multibytes
* @param boolean $entities true if want to entity the output
* @return string
*/
function multibyteEntities($string, $entities = true)
{
static $iconv = null;
if (!isset($iconv)) {
// safe resouce check multiple call
$iconv = function_exists('iconv');
}
if (is_array($string)) {
foreach ($string as $key => $value) {
$string[$key] = multibyteEntities($value, $entities);
}
return $string;
}
if (is_object($string)) {
foreach (get_object_vars($string) as $key => $value) {
$string->{$key} = multibyteEntities($value, $entities);
}
return $string;
}
if (!$iconv) { // add \n\r\t as ASCII
return $entities ? htmlentities(html_entity_decode($string)) : $string;
}
/**
* Work Safe with Parse 4096 Bit | 4KB data split for regex callback & safe memory usage
* that maybe fail on very long string
*/
if (strlen($string) >= 4096) {
return implode('', multibyteEntities(str_split($string, 4096), $entities));
}
return preg_replace_callback('/[\x{80}-\x{10FFFF}]/u', function ($m) {
$char = current($m);
$utf = iconv('UTF-8', 'UCS-4//IGNORE', $char);
return sprintf("&#x%s;", ltrim(strtolower(bin2hex($utf)), "0"));
}, ($entities ? htmlentities(html_entity_decode($string)) : $string));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment