Skip to content

Instantly share code, notes, and snippets.

@chx
Last active July 23, 2023 04:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chx/ce1d45398996bbadcaf0bd65a61f5902 to your computer and use it in GitHub Desktop.
Save chx/ce1d45398996bbadcaf0bd65a61f5902 to your computer and use it in GitHub Desktop.
<?php
// Note this is not a transliterator. See https://unicode-org.github.io/icu/userguide/transforms/general/#overview
// on why it was originally named Transliterator but now does a lot more.
$transformation = \Transliterator::createFromRules(':: NFD; :: [:Mn:] Remove; :: NFC;');
$letters = preg_grep('/\pL/u', array_map('utf8', range(0x80, 0x2000)));
$letters = array_combine($letters, $letters);
$transformed = array_map([$transformation, 'transliterate'], $letters);
$map = array_diff_assoc($transformed, $letters);
print count($map);
$search = [' => ', 'array (', ')', ' ', "\n"];
$replace = ['=>', '[', ']', '', ''];
file_put_contents("map.php", str_replace($search, $replace, var_export($map, TRUE)));
function utf8($num)
{
if($num<=0x7F) return chr($num);
if($num<=0x7FF) return chr(($num>>6)+192).chr(($num&63)+128);
if($num<=0xFFFF) return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<=0x1FFFFF) return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128).chr(($num&63)+128);
return '';
}
<?php
namespace core\lib\Drupal\Component\Transliteration;
class RemoveDiacritics {
// phpcs:ignore
protected const MAP = ['À'=>'A','Á'=>'A','Â'=>'A','Ã'=>'A','Ä'=>'A','Å'=>'A','Ç'=>'C','È'=>'E','É'=>'E','Ê'=>'E','Ë'=>'E','Ì'=>'I','Í'=>'I','Î'=>'I','Ï'=>'I','Ñ'=>'N','Ò'=>'O','Ó'=>'O','Ô'=>'O','Õ'=>'O','Ö'=>'O','Ù'=>'U','Ú'=>'U','Û'=>'U','Ü'=>'U','Ý'=>'Y','à'=>'a','á'=>'a','â'=>'a','ã'=>'a','ä'=>'a','å'=>'a','ç'=>'c','è'=>'e','é'=>'e','ê'=>'e','ë'=>'e','ì'=>'i','í'=>'i','î'=>'i','ï'=>'i','ñ'=>'n','ò'=>'o','ó'=>'o','ô'=>'o','õ'=>'o','ö'=>'o','ù'=>'u','ú'=>'u','û'=>'u','ü'=>'u','ý'=>'y','ÿ'=>'y','Ā'=>'A','ā'=>'a','Ă'=>'A','ă'=>'a','Ą'=>'A','ą'=>'a','Ć'=>'C','ć'=>'c','Ĉ'=>'C','ĉ'=>'c','Ċ'=>'C','ċ'=>'c','Č'=>'C','č'=>'c','Ď'=>'D','ď'=>'d','Ē'=>'E','ē'=>'e','Ĕ'=>'E','ĕ'=>'e','Ė'=>'E','ė'=>'e','Ę'=>'E','ę'=>'e','Ě'=>'E','ě'=>'e','Ĝ'=>'G','ĝ'=>'g','Ğ'=>'G','ğ'=>'g','Ġ'=>'G','ġ'=>'g','Ģ'=>'G','ģ'=>'g','Ĥ'=>'H','ĥ'=>'h','Ĩ'=>'I','ĩ'=>'i','Ī'=>'I','ī'=>'i','Ĭ'=>'I','ĭ'=>'i','Į'=>'I','į'=>'i','İ'=>'I','Ĵ'=>'J','ĵ'=>'j','Ķ'=>'K','ķ'=>'k','Ĺ'=>'L','ĺ'=>'l','Ļ'=>'L','ļ'=>'l','Ľ'=>'L','ľ'=>'l','Ń'=>'N','ń'=>'n','Ņ'=>'N','ņ'=>'n','Ň'=>'N','ň'=>'n','Ō'=>'O','ō'=>'o','Ŏ'=>'O','ŏ'=>'o','Ő'=>'O','ő'=>'o','Ŕ'=>'R','ŕ'=>'r','Ŗ'=>'R','ŗ'=>'r','Ř'=>'R','ř'=>'r','Ś'=>'S','ś'=>'s','Ŝ'=>'S','ŝ'=>'s','Ş'=>'S','ş'=>'s','Š'=>'S','š'=>'s','Ţ'=>'T','ţ'=>'t','Ť'=>'T','ť'=>'t','Ũ'=>'U','ũ'=>'u','Ū'=>'U','ū'=>'u','Ŭ'=>'U','ŭ'=>'u','Ů'=>'U','ů'=>'u','Ű'=>'U','ű'=>'u','Ų'=>'U','ų'=>'u','Ŵ'=>'W','ŵ'=>'w','Ŷ'=>'Y','ŷ'=>'y','Ÿ'=>'Y','Ź'=>'Z','ź'=>'z','Ż'=>'Z','ż'=>'z','Ž'=>'Z','ž'=>'z','Ơ'=>'O','ơ'=>'o','Ư'=>'U','ư'=>'u','Ǎ'=>'A','ǎ'=>'a','Ǐ'=>'I','ǐ'=>'i','Ǒ'=>'O','ǒ'=>'o','Ǔ'=>'U','ǔ'=>'u','Ǖ'=>'U','ǖ'=>'u','Ǘ'=>'U','ǘ'=>'u','Ǚ'=>'U','ǚ'=>'u','Ǜ'=>'U','ǜ'=>'u','Ǟ'=>'A','ǟ'=>'a','Ǡ'=>'A','ǡ'=>'a','Ǣ'=>'Æ','ǣ'=>'æ','Ǧ'=>'G','ǧ'=>'g','Ǩ'=>'K','ǩ'=>'k','Ǫ'=>'O','ǫ'=>'o','Ǭ'=>'O','ǭ'=>'o','Ǯ'=>'Ʒ','ǯ'=>'ʒ','ǰ'=>'j','Ǵ'=>'G','ǵ'=>'g','Ǹ'=>'N','ǹ'=>'n','Ǻ'=>'A','ǻ'=>'a','Ǽ'=>'Æ','ǽ'=>'æ','Ǿ'=>'Ø','ǿ'=>'ø','Ȁ'=>'A','ȁ'=>'a','Ȃ'=>'A','ȃ'=>'a','Ȅ'=>'E','ȅ'=>'e','Ȇ'=>'E','ȇ'=>'e','Ȉ'=>'I','ȉ'=>'i','Ȋ'=>'I','ȋ'=>'i','Ȍ'=>'O','ȍ'=>'o','Ȏ'=>'O','ȏ'=>'o','Ȑ'=>'R','ȑ'=>'r','Ȓ'=>'R','ȓ'=>'r','Ȕ'=>'U','ȕ'=>'u','Ȗ'=>'U','ȗ'=>'u','Ș'=>'S','ș'=>'s','Ț'=>'T','ț'=>'t','Ȟ'=>'H','ȟ'=>'h','Ȧ'=>'A','ȧ'=>'a','Ȩ'=>'E','ȩ'=>'e','Ȫ'=>'O','ȫ'=>'o','Ȭ'=>'O','ȭ'=>'o','Ȯ'=>'O','ȯ'=>'o','Ȱ'=>'O','ȱ'=>'o','Ȳ'=>'Y','ȳ'=>'y','ʹ'=>'ʹ','Ά'=>'Α','Έ'=>'Ε','Ή'=>'Η','Ί'=>'Ι','Ό'=>'Ο','Ύ'=>'Υ','Ώ'=>'Ω','ΐ'=>'ι','Ϊ'=>'Ι','Ϋ'=>'Υ','ά'=>'α','έ'=>'ε','ή'=>'η','ί'=>'ι','ΰ'=>'υ','ϊ'=>'ι','ϋ'=>'υ','ό'=>'ο','ύ'=>'υ','ώ'=>'ω','ϓ'=>'ϒ','ϔ'=>'ϒ','Ѐ'=>'Е','Ё'=>'Е','Ѓ'=>'Г','Ї'=>'І','Ќ'=>'К','Ѝ'=>'И','Ў'=>'У','Й'=>'И','й'=>'и','ѐ'=>'е','ё'=>'е','ѓ'=>'г','ї'=>'і','ќ'=>'к','ѝ'=>'и','ў'=>'у','Ѷ'=>'Ѵ','ѷ'=>'ѵ','Ӂ'=>'Ж','ӂ'=>'ж','Ӑ'=>'А','ӑ'=>'а','Ӓ'=>'А','ӓ'=>'а','Ӗ'=>'Е','ӗ'=>'е','Ӛ'=>'Ә','ӛ'=>'ә','Ӝ'=>'Ж','ӝ'=>'ж','Ӟ'=>'З','ӟ'=>'з','Ӣ'=>'И','ӣ'=>'и','Ӥ'=>'И','ӥ'=>'и','Ӧ'=>'О','ӧ'=>'о','Ӫ'=>'Ө','ӫ'=>'ө','Ӭ'=>'Э','ӭ'=>'э','Ӯ'=>'У','ӯ'=>'у','Ӱ'=>'У','ӱ'=>'у','Ӳ'=>'У','ӳ'=>'у','Ӵ'=>'Ч','ӵ'=>'ч','Ӹ'=>'Ы','ӹ'=>'ы','آ'=>'ا','أ'=>'ا','ؤ'=>'و','إ'=>'ا','ئ'=>'ي','ۀ'=>'ە','ۂ'=>'ہ','ۓ'=>'ے','ऩ'=>'न','ऱ'=>'र','ऴ'=>'ळ','क़'=>'क','ख़'=>'ख','ग़'=>'ग','ज़'=>'ज','ड़'=>'ड','ढ़'=>'ढ','फ़'=>'फ','य़'=>'य','ড়'=>'ড','ঢ়'=>'ঢ','য়'=>'য','ਲ਼'=>'ਲ','ਸ਼'=>'ਸ','ਖ਼'=>'ਖ','ਗ਼'=>'ਗ','ਜ਼'=>'ਜ','ਫ਼'=>'ਫ','ଡ଼'=>'ଡ','ଢ଼'=>'ଢ','གྷ'=>'ག','ཌྷ'=>'ཌ','དྷ'=>'ད','བྷ'=>'བ','ཛྷ'=>'ཛ','ཀྵ'=>'ཀ','ဦ'=>'ဥ','Ḁ'=>'A','ḁ'=>'a','Ḃ'=>'B','ḃ'=>'b','Ḅ'=>'B','ḅ'=>'b','Ḇ'=>'B','ḇ'=>'b','Ḉ'=>'C','ḉ'=>'c','Ḋ'=>'D','ḋ'=>'d','Ḍ'=>'D','ḍ'=>'d','Ḏ'=>'D','ḏ'=>'d','Ḑ'=>'D','ḑ'=>'d','Ḓ'=>'D','ḓ'=>'d','Ḕ'=>'E','ḕ'=>'e','Ḗ'=>'E','ḗ'=>'e','Ḙ'=>'E','ḙ'=>'e','Ḛ'=>'E','ḛ'=>'e','Ḝ'=>'E','ḝ'=>'e','Ḟ'=>'F','ḟ'=>'f','Ḡ'=>'G','ḡ'=>'g','Ḣ'=>'H','ḣ'=>'h','Ḥ'=>'H','ḥ'=>'h','Ḧ'=>'H','ḧ'=>'h','Ḩ'=>'H','ḩ'=>'h','Ḫ'=>'H','ḫ'=>'h','Ḭ'=>'I','ḭ'=>'i','Ḯ'=>'I','ḯ'=>'i','Ḱ'=>'K','ḱ'=>'k','Ḳ'=>'K','ḳ'=>'k','Ḵ'=>'K','ḵ'=>'k','Ḷ'=>'L','ḷ'=>'l','Ḹ'=>'L','ḹ'=>'l','Ḻ'=>'L','ḻ'=>'l','Ḽ'=>'L','ḽ'=>'l','Ḿ'=>'M','ḿ'=>'m','Ṁ'=>'M','ṁ'=>'m','Ṃ'=>'M','ṃ'=>'m','Ṅ'=>'N','ṅ'=>'n','Ṇ'=>'N','ṇ'=>'n','Ṉ'=>'N','ṉ'=>'n','Ṋ'=>'N','ṋ'=>'n','Ṍ'=>'O','ṍ'=>'o','Ṏ'=>'O','ṏ'=>'o','Ṑ'=>'O','ṑ'=>'o','Ṓ'=>'O','ṓ'=>'o','Ṕ'=>'P','ṕ'=>'p','Ṗ'=>'P','ṗ'=>'p','Ṙ'=>'R','ṙ'=>'r','Ṛ'=>'R','ṛ'=>'r','Ṝ'=>'R','ṝ'=>'r','Ṟ'=>'R','ṟ'=>'r','Ṡ'=>'S','ṡ'=>'s','Ṣ'=>'S','ṣ'=>'s','Ṥ'=>'S','ṥ'=>'s','Ṧ'=>'S','ṧ'=>'s','Ṩ'=>'S','ṩ'=>'s','Ṫ'=>'T','ṫ'=>'t','Ṭ'=>'T','ṭ'=>'t','Ṯ'=>'T','ṯ'=>'t','Ṱ'=>'T','ṱ'=>'t','Ṳ'=>'U','ṳ'=>'u','Ṵ'=>'U','ṵ'=>'u','Ṷ'=>'U','ṷ'=>'u','Ṹ'=>'U','ṹ'=>'u','Ṻ'=>'U','ṻ'=>'u','Ṽ'=>'V','ṽ'=>'v','Ṿ'=>'V','ṿ'=>'v','Ẁ'=>'W','ẁ'=>'w','Ẃ'=>'W','ẃ'=>'w','Ẅ'=>'W','ẅ'=>'w','Ẇ'=>'W','ẇ'=>'w','Ẉ'=>'W','ẉ'=>'w','Ẋ'=>'X','ẋ'=>'x','Ẍ'=>'X','ẍ'=>'x','Ẏ'=>'Y','ẏ'=>'y','Ẑ'=>'Z','ẑ'=>'z','Ẓ'=>'Z','ẓ'=>'z','Ẕ'=>'Z','ẕ'=>'z','ẖ'=>'h','ẗ'=>'t','ẘ'=>'w','ẙ'=>'y','ẛ'=>'ſ','Ạ'=>'A','ạ'=>'a','Ả'=>'A','ả'=>'a','Ấ'=>'A','ấ'=>'a','Ầ'=>'A','ầ'=>'a','Ẩ'=>'A','ẩ'=>'a','Ẫ'=>'A','ẫ'=>'a','Ậ'=>'A','ậ'=>'a','Ắ'=>'A','ắ'=>'a','Ằ'=>'A','ằ'=>'a','Ẳ'=>'A','ẳ'=>'a','Ẵ'=>'A','ẵ'=>'a','Ặ'=>'A','ặ'=>'a','Ẹ'=>'E','ẹ'=>'e','Ẻ'=>'E','ẻ'=>'e','Ẽ'=>'E','ẽ'=>'e','Ế'=>'E','ế'=>'e','Ề'=>'E','ề'=>'e','Ể'=>'E','ể'=>'e','Ễ'=>'E','ễ'=>'e','Ệ'=>'E','ệ'=>'e','Ỉ'=>'I','ỉ'=>'i','Ị'=>'I','ị'=>'i','Ọ'=>'O','ọ'=>'o','Ỏ'=>'O','ỏ'=>'o','Ố'=>'O','ố'=>'o','Ồ'=>'O','ồ'=>'o','Ổ'=>'O','ổ'=>'o','Ỗ'=>'O','ỗ'=>'o','Ộ'=>'O','ộ'=>'o','Ớ'=>'O','ớ'=>'o','Ờ'=>'O','ờ'=>'o','Ở'=>'O','ở'=>'o','Ỡ'=>'O','ỡ'=>'o','Ợ'=>'O','ợ'=>'o','Ụ'=>'U','ụ'=>'u','Ủ'=>'U','ủ'=>'u','Ứ'=>'U','ứ'=>'u','Ừ'=>'U','ừ'=>'u','Ử'=>'U','ử'=>'u','Ữ'=>'U','ữ'=>'u','Ự'=>'U','ự'=>'u','Ỳ'=>'Y','ỳ'=>'y','Ỵ'=>'Y','ỵ'=>'y','Ỷ'=>'Y','ỷ'=>'y','Ỹ'=>'Y','ỹ'=>'y','ἀ'=>'α','ἁ'=>'α','ἂ'=>'α','ἃ'=>'α','ἄ'=>'α','ἅ'=>'α','ἆ'=>'α','ἇ'=>'α','Ἀ'=>'Α','Ἁ'=>'Α','Ἂ'=>'Α','Ἃ'=>'Α','Ἄ'=>'Α','Ἅ'=>'Α','Ἆ'=>'Α','Ἇ'=>'Α','ἐ'=>'ε','ἑ'=>'ε','ἒ'=>'ε','ἓ'=>'ε','ἔ'=>'ε','ἕ'=>'ε','Ἐ'=>'Ε','Ἑ'=>'Ε','Ἒ'=>'Ε','Ἓ'=>'Ε','Ἔ'=>'Ε','Ἕ'=>'Ε','ἠ'=>'η','ἡ'=>'η','ἢ'=>'η','ἣ'=>'η','ἤ'=>'η','ἥ'=>'η','ἦ'=>'η','ἧ'=>'η','Ἠ'=>'Η','Ἡ'=>'Η','Ἢ'=>'Η','Ἣ'=>'Η','Ἤ'=>'Η','Ἥ'=>'Η','Ἦ'=>'Η','Ἧ'=>'Η','ἰ'=>'ι','ἱ'=>'ι','ἲ'=>'ι','ἳ'=>'ι','ἴ'=>'ι','ἵ'=>'ι','ἶ'=>'ι','ἷ'=>'ι','Ἰ'=>'Ι','Ἱ'=>'Ι','Ἲ'=>'Ι','Ἳ'=>'Ι','Ἴ'=>'Ι','Ἵ'=>'Ι','Ἶ'=>'Ι','Ἷ'=>'Ι','ὀ'=>'ο','ὁ'=>'ο','ὂ'=>'ο','ὃ'=>'ο','ὄ'=>'ο','ὅ'=>'ο','Ὀ'=>'Ο','Ὁ'=>'Ο','Ὂ'=>'Ο','Ὃ'=>'Ο','Ὄ'=>'Ο','Ὅ'=>'Ο','ὐ'=>'υ','ὑ'=>'υ','ὒ'=>'υ','ὓ'=>'υ','ὔ'=>'υ','ὕ'=>'υ','ὖ'=>'υ','ὗ'=>'υ','Ὑ'=>'Υ','Ὓ'=>'Υ','Ὕ'=>'Υ','Ὗ'=>'Υ','ὠ'=>'ω','ὡ'=>'ω','ὢ'=>'ω','ὣ'=>'ω','ὤ'=>'ω','ὥ'=>'ω','ὦ'=>'ω','ὧ'=>'ω','Ὠ'=>'Ω','Ὡ'=>'Ω','Ὢ'=>'Ω','Ὣ'=>'Ω','Ὤ'=>'Ω','Ὥ'=>'Ω','Ὦ'=>'Ω','Ὧ'=>'Ω','ὰ'=>'α','ά'=>'α','ὲ'=>'ε','έ'=>'ε','ὴ'=>'η','ή'=>'η','ὶ'=>'ι','ί'=>'ι','ὸ'=>'ο','ό'=>'ο','ὺ'=>'υ','ύ'=>'υ','ὼ'=>'ω','ώ'=>'ω','ᾀ'=>'α','ᾁ'=>'α','ᾂ'=>'α','ᾃ'=>'α','ᾄ'=>'α','ᾅ'=>'α','ᾆ'=>'α','ᾇ'=>'α','ᾈ'=>'Α','ᾉ'=>'Α','ᾊ'=>'Α','ᾋ'=>'Α','ᾌ'=>'Α','ᾍ'=>'Α','ᾎ'=>'Α','ᾏ'=>'Α','ᾐ'=>'η','ᾑ'=>'η','ᾒ'=>'η','ᾓ'=>'η','ᾔ'=>'η','ᾕ'=>'η','ᾖ'=>'η','ᾗ'=>'η','ᾘ'=>'Η','ᾙ'=>'Η','ᾚ'=>'Η','ᾛ'=>'Η','ᾜ'=>'Η','ᾝ'=>'Η','ᾞ'=>'Η','ᾟ'=>'Η','ᾠ'=>'ω','ᾡ'=>'ω','ᾢ'=>'ω','ᾣ'=>'ω','ᾤ'=>'ω','ᾥ'=>'ω','ᾦ'=>'ω','ᾧ'=>'ω','ᾨ'=>'Ω','ᾩ'=>'Ω','ᾪ'=>'Ω','ᾫ'=>'Ω','ᾬ'=>'Ω','ᾭ'=>'Ω','ᾮ'=>'Ω','ᾯ'=>'Ω','ᾰ'=>'α','ᾱ'=>'α','ᾲ'=>'α','ᾳ'=>'α','ᾴ'=>'α','ᾶ'=>'α','ᾷ'=>'α','Ᾰ'=>'Α','Ᾱ'=>'Α','Ὰ'=>'Α','Ά'=>'Α','ᾼ'=>'Α','ι'=>'ι','ῂ'=>'η','ῃ'=>'η','ῄ'=>'η','ῆ'=>'η','ῇ'=>'η','Ὲ'=>'Ε','Έ'=>'Ε','Ὴ'=>'Η','Ή'=>'Η','ῌ'=>'Η','ῐ'=>'ι','ῑ'=>'ι','ῒ'=>'ι','ΐ'=>'ι','ῖ'=>'ι','ῗ'=>'ι','Ῐ'=>'Ι','Ῑ'=>'Ι','Ὶ'=>'Ι','Ί'=>'Ι','ῠ'=>'υ','ῡ'=>'υ','ῢ'=>'υ','ΰ'=>'υ','ῤ'=>'ρ','ῥ'=>'ρ','ῦ'=>'υ','ῧ'=>'υ','Ῠ'=>'Υ','Ῡ'=>'Υ','Ὺ'=>'Υ','Ύ'=>'Υ','Ῥ'=>'Ρ','ῲ'=>'ω','ῳ'=>'ω','ῴ'=>'ω','ῶ'=>'ω','ῷ'=>'ω','Ὸ'=>'Ο','Ό'=>'Ο','Ὼ'=>'Ω','Ώ'=>'Ω','ῼ'=>'Ω'];
public function removeDiacritics($string) {
return strtr($string, static::MAP);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment