Skip to content

Instantly share code, notes, and snippets.

@kamiaka
Last active July 26, 2022 10:29
Show Gist options
  • Save kamiaka/1df9a636a449b41b62e78c22f5efda66 to your computer and use it in GitHub Desktop.
Save kamiaka/1df9a636a449b41b62e78c22f5efda66 to your computer and use it in GitHub Desktop.
Replace Japanese Kana from UTF-8-MAC to UTF-8 for any env.
<?php
/**
* Replace Japanese Kana from UTF-8-MAC to UTF-8 for any env.
*
* code is generated
*
* @param string utf-8-mac string
* @return string utf-8 string
*/
function normalizeUTF8MacKana($str) {
return str_replace(
array("\xe3\x81\x8b\xe3\x82\x99", "\xe3\x81\x8d\xe3\x82\x99", "\xe3\x81\x8f\xe3\x82\x99", "\xe3\x81\x91\xe3\x82\x99", "\xe3\x81\x93\xe3\x82\x99", "\xe3\x81\x95\xe3\x82\x99", "\xe3\x81\x97\xe3\x82\x99", "\xe3\x81\x99\xe3\x82\x99", "\xe3\x81\x9b\xe3\x82\x99", "\xe3\x81\x9d\xe3\x82\x99", "\xe3\x81\x9f\xe3\x82\x99", "\xe3\x81\xa1\xe3\x82\x99", "\xe3\x81\xa4\xe3\x82\x99", "\xe3\x81\xa6\xe3\x82\x99", "\xe3\x81\xa8\xe3\x82\x99", "\xe3\x81\xaf\xe3\x82\x99", "\xe3\x81\xb2\xe3\x82\x99", "\xe3\x81\xb5\xe3\x82\x99", "\xe3\x81\xb8\xe3\x82\x99", "\xe3\x81\xbb\xe3\x82\x99", "\xe3\x81\x86\xe3\x82\x99", "\xe3\x82\x9d\xe3\x82\x99", "\xe3\x82\xab\xe3\x82\x99", "\xe3\x82\xad\xe3\x82\x99", "\xe3\x82\xaf\xe3\x82\x99", "\xe3\x82\xb1\xe3\x82\x99", "\xe3\x82\xb3\xe3\x82\x99", "\xe3\x82\xb5\xe3\x82\x99", "\xe3\x82\xb7\xe3\x82\x99", "\xe3\x82\xb9\xe3\x82\x99", "\xe3\x82\xbb\xe3\x82\x99", "\xe3\x82\xbd\xe3\x82\x99", "\xe3\x82\xbf\xe3\x82\x99", "\xe3\x83\x81\xe3\x82\x99", "\xe3\x83\x84\xe3\x82\x99", "\xe3\x83\x86\xe3\x82\x99", "\xe3\x83\x88\xe3\x82\x99", "\xe3\x83\x8f\xe3\x82\x99", "\xe3\x83\x92\xe3\x82\x99", "\xe3\x83\x95\xe3\x82\x99", "\xe3\x83\x98\xe3\x82\x99", "\xe3\x83\x9b\xe3\x82\x99", "\xe3\x82\xa6\xe3\x82\x99", "\xe3\x83\xaf\xe3\x82\x99", "\xe3\x83\xb0\xe3\x82\x99", "\xe3\x83\xb1\xe3\x82\x99", "\xe3\x83\xb2\xe3\x82\x99", "\xe3\x81\xaf\xe3\x82\x9a", "\xe3\x81\xb2\xe3\x82\x9a", "\xe3\x81\xb5\xe3\x82\x9a", "\xe3\x81\xb8\xe3\x82\x9a", "\xe3\x81\xbb\xe3\x82\x9a", "\xe3\x83\x8f\xe3\x82\x9a", "\xe3\x83\x92\xe3\x82\x9a", "\xe3\x83\x95\xe3\x82\x9a", "\xe3\x83\x98\xe3\x82\x9a", "\xe3\x83\x9b\xe3\x82\x9a", "\xe3\x82\x99", "\xe3\x82\x9a"),
array("\xe3\x81\x8c", "\xe3\x81\x8e", "\xe3\x81\x90", "\xe3\x81\x92", "\xe3\x81\x94", "\xe3\x81\x96", "\xe3\x81\x98", "\xe3\x81\x9a", "\xe3\x81\x9c", "\xe3\x81\x9e", "\xe3\x81\xa0", "\xe3\x81\xa2", "\xe3\x81\xa5", "\xe3\x81\xa7", "\xe3\x81\xa9", "\xe3\x81\xb0", "\xe3\x81\xb3", "\xe3\x81\xb6", "\xe3\x81\xb9", "\xe3\x81\xbc", "\xe3\x82\x94", "\xe3\x82\x9e", "\xe3\x82\xac", "\xe3\x82\xae", "\xe3\x82\xb0", "\xe3\x82\xb2", "\xe3\x82\xb4", "\xe3\x82\xb6", "\xe3\x82\xb8", "\xe3\x82\xba", "\xe3\x82\xbc", "\xe3\x82\xbe", "\xe3\x83\x80", "\xe3\x83\x82", "\xe3\x83\x85", "\xe3\x83\x87", "\xe3\x83\x89", "\xe3\x83\x90", "\xe3\x83\x93", "\xe3\x83\x96", "\xe3\x83\x99", "\xe3\x83\x9c", "\xe3\x83\xb4", "\xe3\x83\xb7", "\xe3\x83\xb8", "\xe3\x83\xb9", "\xe3\x83\xba", "\xe3\x81\xb1", "\xe3\x81\xb4", "\xe3\x81\xb7", "\xe3\x81\xba", "\xe3\x81\xbd", "\xe3\x83\x91", "\xe3\x83\x94", "\xe3\x83\x97", "\xe3\x83\x9a", "\xe3\x83\x9d", "\xe3\x82\x9b", "\xe3\x82\x9c"),
$str
);
}
<?php
/**
* Generate the normalizeUTF8MacKana function.
*
* @return string
*/
function normalizeUTF8MacKanaFuncGen() {
$fullLs = [
"かきくけこさしすせそたちつてとはひふへほうゝカキクケコサシスセソタチツテトハヒフヘホウワヰヱヲ",
"がぎぐげござじずぜぞだぢづでどばびぶべぼゔゞガギグゲゴザジズゼゾダヂヅデドバビブベボヴヷヸヹヺ"
];
$halfLs = ["はひふへほハヒフヘホ", "ぱぴぷぺぽパピプペポ"];
$search = [];
$replace = [];
foreach ([$fullLs, $halfLs] as $isHalf => $ls) {
foreach ([true, false] as $key => $isSearch) {
$name = $isSearch ? 'search' : 'replace';
$char = mb_convert_encoding($ls[$key], 'UTF-8', 'auto');
for ($i = 0; $i < mb_strlen($char); $i++) {
$str = mb_substr($char, $i, 1);
if ($isSearch) {
$str .= $isHalf ? "\xe3\x82\x9a" : "\xe3\x82\x99";
}
$hex = '';
for ($j = 0; $j < strlen($str); $j++) {
$hex .= '\x' . dechex(ord($str[$j]));
}
$$name[] = $hex;
}
}
}
return implode("\n", [
'function normalizeUTF8MacKana($str) {',
' return str_replace(',
' array("' . implode('", "', $search ) . '", "\xe3\x82\x99", "\xe3\x82\x9a"),',
' array("' . implode('", "', $replace) . '", "\xe3\x82\x9b", "\xe3\x82\x9c"),',
' $str',
' );',
'}'
]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment