Skip to content

Instantly share code, notes, and snippets.

@codemasher
Last active January 30, 2023 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codemasher/d07d3e6e9346c08e7a41b8b978784952 to your computer and use it in GitHub Desktop.
Save codemasher/d07d3e6e9346c08e7a41b8b978784952 to your computer and use it in GitHub Desktop.
Create a table of Shift-JIS Kanji in PHP
<?php
/**
* sjis-table.php
*
* QR Code ISO/IEC 18004:2000 - 8.4.5 Kanji Mode
*
* @see https://en.wikipedia.org/wiki/Shift_JIS#With_vendor_or_JIS_X_0213_extensions
* @see http://www.rikai.com/library/kanjitables/kanji_codes.sjis.shtml
*
* @created 29.01.2023
* @author smiley <smiley@chillerlan.net>
* @copyright 2023 smiley
* @license MIT
*/
$list = [];
for($byte1 = 0x81; $byte1 < 0xeb; $byte1 += 0x1){
// skip invalid range
if(($byte1 > 0x84 && $byte1 < 0x88) || ($byte1 > 0x9f && $byte1 < 0xe0)){
continue;
}
$byte1IsOdd = ($byte1 % 2) !== 0;
$list[$byte1] = $byte1IsOdd
? array_fill(0x40, 96, null) // 0x40 - 0x9e
: array_fill(0x90, 112, null); // 0x9f - 0xfc
// second byte of a double-byte JIS X 0208 character whose first half of the JIS sequence was odd
if($byte1IsOdd){
for($byte2 = 0x40; $byte2 < 0x9f; $byte2++){
if($byte2 === 0x7f){
continue;
}
$list[$byte1][$byte2] = chr($byte1).chr($byte2);
}
}
// second byte if the first half of the JIS sequence was even
else{
for($byte2 = 0x9f; $byte2 < 0xfd; $byte2++){
$list[$byte1][$byte2] = chr($byte1).chr($byte2);
}
}
}
// output
if(php_sapi_name() !== 'cli'){
echo '<!doctype html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<title>Shift JIS Kanji Code Table</title>
</head>
<body>
';
foreach($list as $chars){
$chars = array_map(fn($chr) => ($chr !== null ? mb_convert_encoding($chr, 'UTF-8', 'SJIS-2004') : null), $chars);
$rows = [];
foreach(array_chunk($chars, 16, true) as $row){
$rows[] = sprintf('<td>%s</td>', implode('</td><td>', $row));
}
printf("<table><tr>%s</tr></table>\n", implode('</tr><tr>', $rows));
}
echo '</body>';
}
else{
foreach($list as $chars){
$chars = array_map(fn($chr) => ($chr !== null ? mb_convert_encoding($chr, 'UTF-8', 'SJIS-2004') : null), $chars);
$rows = [];
foreach(array_chunk($chars, 16, true) as $row){
$rows[] = implode(' ', $row);
}
printf("%s\n", implode("\n", $rows));
}
}
exit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment