Skip to content

Instantly share code, notes, and snippets.

@ten9miq
Last active July 28, 2023 01:55
Show Gist options
  • Save ten9miq/ba6eb7be4d4edf5f08b70d073f680717 to your computer and use it in GitHub Desktop.
Save ten9miq/ba6eb7be4d4edf5f08b70d073f680717 to your computer and use it in GitHub Desktop.
// UTF-8の範囲内で、特定の開始コードポイントから終了コードポイントまでの文字を生成する関数
function generateUTF8CharactersInRange($startCodePoint, $endCodePoint) {
$utf8Characters = '';
for ($codePoint = $startCodePoint; $codePoint <= $endCodePoint; $codePoint++) {
$utf8Characters .= mb_convert_encoding('&#' . $codePoint . ';', 'UTF-8', 'HTML-ENTITIES');
}
return $utf8Characters;
}
// UTF-8のすべての文字を生成
$utf8String = generateUTF8CharactersInRange(0x0000, 0x10FFFF);
// UTF-8の文字列をSJISに変換し、変換できない文字を抽出
function findUnconvertibleChars($utf8String) {
$unconvertibleChars = [];
$length = mb_strlen($utf8String);
for ($i = 0; $i < $length; $i++) {
$char = mb_substr($utf8String, $i, 1);
$sjisChar = mb_convert_encoding($char, 'SJIS', 'UTF-8');
if ($sjisChar === '?') {
$unconvertibleChars[] = $char;
}
}
return $unconvertibleChars;
}
// 変換できない文字の一覧を取得
$unconvertibleChars = findUnconvertibleChars($utf8String);
// 一覧を出力
echo "変換できない文字の一覧:\n";
foreach ($unconvertibleChars as $char) {
echo $char . "\n";
}
############################################################
// UTF-8の範囲内で、特定の開始コードポイントから終了コードポイントまでの文字を生成する関数
function generateUTF8CharactersInRange($startCodePoint, $endCodePoint) {
$utf8Characters = '';
for ($codePoint = $startCodePoint; $codePoint <= $endCodePoint; $codePoint++) {
$utf8Characters .= mb_convert_encoding('&#' . $codePoint . ';', 'UTF-8', 'HTML-ENTITIES');
}
return $utf8Characters;
}
// 処理を分割して実行する
$startCodePoint = 0x0000;
$endCodePoint = 0xFFFF;
while ($startCodePoint <= 0x10FFFF) {
$endCodePoint = min($startCodePoint + 0xFFFF, 0x10FFFF);
$utf8String = generateUTF8CharactersInRange($startCodePoint, $endCodePoint);
// UTF-8の文字列をSJISに変換し、変換できない文字を抽出
$unconvertibleChars = findUnconvertibleChars($utf8String);
// 変換できない文字の一覧を出力
echo "変換できない文字の一覧:\n";
foreach ($unconvertibleChars as $char) {
echo $char . "\n";
}
// 次の範囲に移動
$startCodePoint = $endCodePoint + 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment