Created
May 23, 2011 02:57
-
-
Save pacochi/986137 to your computer and use it in GitHub Desktop.
間違った入力モードで入力された文を読みやすくするクラス
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# 宇宙語・みかか語変換 | |
# ぺたちゃ用に作りました | |
# UTF-8 向けです | |
/* | |
mb_internal_encoding('UTF-8'); | |
require_once('./JP106Key.php'); | |
echo JP106Key::AtoK('vot@u') . " / "; | |
echo JP106Key::KtoA('ちりせくちこいか') . " / "; | |
echo JP106Key::KtoR('すらほもちまに') . " / "; | |
$str = 'd@s@4fy^@zします'; | |
$str = JP106Key::isKana($str) ? JP106Key::KtoR($str) : JP106Key::AtoK($str); | |
echo $str; | |
*/ | |
class JP106Key { | |
const SyllableReg = '/([^aiueo]*)([aiueo]|$)/'; # 母音 | |
const GemConReg = '/([kqgszjtcdhfbvpmrwylx])(\\1)/'; # 促音 | |
const SylNasReg = '/(nn?|m)/'; # 撥音 | |
const LonVowReg = '/(\\-|-|h)/'; # 長音 | |
private static $baseMap = array( # 基本 | |
'あ' => '3', 'い' => 'e', 'う' => '4', 'え' => '5', 'お' => '6', | |
'か' => 't', 'き' => 'g', 'く' => 'h', 'け' => ':', 'こ' => 'b', | |
'さ' => 'x', 'し' => 'd', 'す' => 'r', 'せ' => 'p', 'そ' => 'c', | |
'た' => 'q', 'ち' => 'a', 'つ' => 'z', 'て' => 'w', 'と' => 's', | |
'な' => 'u', 'に' => 'i', 'ぬ' => '1', 'ね' => ',', 'の' => 'k', | |
'は' => 'f', 'ひ' => 'v', 'ふ' => '2', 'へ' => '^', 'ほ' => '-', | |
'ま' => 'j', 'み' => 'n', 'む' => ']', 'め' => '/', 'も' => 'm', | |
'や' => '7', 'ゆ' => '8', 'よ' => '9', 'わ' => '0', 'ん' => 'y', | |
'ら' => 'o', 'り' => 'l', 'る' => '.', 'れ' => ';', 'ろ' => '\', # ¥になるんだけどさ | |
'ー' => '¥', '゛' => '@', '゜' => '[', | |
); | |
private static $shiftMap = array( # Shift | |
'ぁ' => '#', 'ぃ' => 'E', 'ぅ' => '$', 'ぇ' => '%', 'ぉ' => '&', | |
"ヵ" => 'T', 'き' => 'G', 'く' => 'H', 'ヶ' => '*', 'こ' => 'B', | |
'さ' => 'X', 'し' => 'D', 'す' => 'R', 'せ' => 'P', 'そ' => 'C', | |
'た' => 'Q', 'ち' => 'A', 'っ' => 'Z', 'て' => 'W', 'と' => 'S', | |
'な' => 'U', 'に' => 'I', 'ぬ' => '!', '、' => '<', 'の' => 'K', | |
'ゎ' => 'F', 'ゐ' => 'V', 'ふ' => '”', 'ゑ' => '~', 'ほ' => '=', | |
'ま' => 'J', 'み' => 'N', '」' => '}', '・' => '?', 'も' => 'M', | |
'ゃ' => '’', 'ゅ' => '(', 'ょ' => ')', 'ん' => 'Y', | |
'ら' => 'O', 'り' => 'L', '。' => '>', 'れ' => '+', 'ろ' => '_', | |
'ー' => '|', '゛' => '‘', '「' => '{', | |
); | |
private static $flippedMap; # $baseMap と $shiftMap のキーと値を反転させたもの | |
private static $mergedMap; # $baseMap と $shiftMap を合わせたもの | |
private static $romajiMap = array ( # ローマ字変換 | |
'a' => array ( | |
'tch' => 'っちゃ', 'ts' => 'つぁ', 'lk' => 'ヵ', 'xk' => 'ヵ', 'lw' => 'ゎ', | |
'sw' => 'すぁ', 'tw' => 'とぁ', 'dw' => 'どぁ', 'fw' => 'ふぁ', 'sh' => 'しゃ', | |
'zh' => 'じゃ', 'ch' => 'ちゃ', 'th' => 'てゃ', 'dh' => 'でゃ', 'wh' => 'うぁ', | |
'ky' => 'きゃ', 'gy' => 'ぎゃ', 'qy' => 'くゃ', 'sy' => 'しゃ', 'zy' => 'じゃ', | |
'jy' => 'じゃ', 'ty' => 'ちゃ', 'cy' => 'ちゃ', 'dy' => 'ぢゃ', 'ny' => 'にゃ', | |
'hy' => 'ひゃ', 'by' => 'びゃ', 'py' => 'ぴゃ', 'fy' => 'ふゃ', 'my' => 'みゃ', | |
'ry' => 'りゃ', 'vy' => 'ヴゃ', 'ly' => 'ゃ', 'xy' => 'ゃ', | |
'k' => 'か', 'g' => 'が', 'q' => 'くぁ', 's' => 'さ', 'c' => 'か', 'z' => 'ざ', | |
'j' => 'じゃ', 't' => 'た', 'd' => 'だ', 'n' => 'な', 'h' => 'は', 'b' => 'ば', | |
'p' => 'ぱ', 'f' => 'ふぁ', 'm' => 'ま', 'y' => 'や', 'r' => 'ら', 'w' => 'わ', | |
'v' => 'ヴぁ', 'l' => 'ぁ', 'x' => 'ぁ', '' => 'あ' | |
), | |
'i' => array ( | |
'tch' => 'っち', 'ts' => 'つぃ', 'dz' => 'ぢ', | |
'sw' => 'すぃ', 'tw' => 'とぃ', 'dw' => 'どぃ', 'fw' => 'ふぃ', 'sh' => 'し', | |
'zh' => 'じぃ', 'ch' => 'ち', 'th' => 'てぃ', 'dh' => 'でぃ', 'wh' => 'ゐ', | |
'ky' => 'きぃ', 'qy' => 'くぃ', 'gy' => 'ぎぃ', 'sy' => 'しぃ', 'jy' => 'じぃ', | |
'zy' => 'じぃ', 'cy' => 'ちぃ', 'ty' => 'ちぃ', 'dy' => 'ぢぃ', 'ny' => 'にぃ', | |
'hy' => 'ひぃ', 'by' => 'びぃ', 'py' => 'ぴぃ', 'fy' => 'ふぃ', 'my' => 'みぃ', | |
'ry' => 'りぃ', 'vy' => 'ヴぃ', 'wy' => 'ゐ', 'ly' => 'ぃ', 'xy' => 'ぃ', | |
'k' => 'き', 'q' => 'くぃ', 'g' => 'ぎ', 's' => 'し', 'c' => 'し', 'z' => 'じ', | |
'j' => 'じ', 't' => 'ち', 'd' => 'ぢ', 'n' => 'に', 'h' => 'ひ', 'b' => 'び', | |
'p' => 'ぴ', 'f' => 'ふぃ', 'm' => 'み', 'y' => 'い', 'r' => 'り', 'w' => 'うぃ', | |
'v' => 'ヴぃ', 'l' => 'ぃ', 'x' => 'ぃ', '' => 'い' | |
), | |
'u' => array ( | |
'tch' => 'っちゅ', 'lts' => 'っ', 'xts' => 'っ', 'ltu' => 'っ', 'ts' => 'つ', | |
'sw' => 'すぅ', 'tw' => 'とぅ', 'dw' => 'どぅ', 'fw' => 'ふぅ', 'sh' => 'しゅ', | |
'zh' => 'じゅ', 'ch' => 'ちゅ', 'th' => 'てゅ', 'dh' => 'でゅ', 'wh' => 'うぅ', | |
'ky' => 'きゅ', 'qy' => 'くゅ', 'gy' => 'ぎゅ', 'sy' => 'しゅ', 'zy' => 'じゅ', | |
'jy' => 'じゅ', 'cy' => 'ちゅ', 'ty' => 'ちゅ', 'dy' => 'ぢゅ', 'ny' => 'にゅ', | |
'hy' => 'ひゅ', 'by' => 'びゅ', 'py' => 'ぴゅ', 'fy' => 'ふゅ', 'my' => 'みゅ', | |
'ry' => 'りゅ', 'vy' => 'ヴゅ', 'ly' => 'ゅ', 'xy' => 'ゅ', | |
'k' => 'く', 'q' => 'く', 'g' => 'ぐ', 's' => 'す', 'c' => 'く', 'z' => 'ず', | |
'j' => 'じゅ', 't' => 'つ', 'd' => 'づ', 'n' => 'ぬ', 'h' => 'ふ', 'b' => 'ぶ', | |
'p' => 'ぷ', 'f' => 'ふ', 'm' => 'む', 'y' => 'ゆ', 'r' => 'る', 'w' => 'う', | |
'v' => 'ヴ', 'l' => 'ぅ', 'x' => 'ぅ', '' => 'う' | |
), | |
'e' => array ( | |
'tch' => 'っちぇ', 'ts' => 'つぇ', 'lk' => 'ヶ', 'xk' => 'ヶ', | |
'sw' => 'すぇ', 'tw' => 'とぇ', 'dw' => 'どぇ', 'fw' => 'ふぇ', 'sh' => 'しぇ', | |
'zh' => 'じぇ', 'ch' => 'ちぇ', 'th' => 'てぇ', 'dh' => 'でぇ', 'wh' => 'ゑ', | |
'ky' => 'きぇ', 'qy' => 'くぇ', 'gy' => 'ぎぇ', 'sy' => 'しぇ', 'zy' => 'じぇ', | |
'jy' => 'じぇ', 'cy' => 'ちぇ', 'ty' => 'ちぇ', 'dy' => 'ぢぇ', 'ny' => 'にぇ', | |
'hy' => 'ひぇ', 'by' => 'びぇ', 'py' => 'ぴぇ', 'fy' => 'ふぇ', 'my' => 'みぇ', | |
'ry' => 'りぇ', 'vy' => 'ヴぇ', 'wy' => 'ゑ', 'ly' => 'ぇ', 'xy' => 'ぇ', | |
'k' => 'け', 'q' => 'くぇ', 'g' => 'げ', 's' => 'せ', 'c' => 'せ', 'z' => 'ぜ', | |
'j' => 'じぇ', 't' => 'て', 'd' => 'で', 'n' => 'ね', 'h' => 'へ', 'b' => 'べ', | |
'p' => 'ぺ', 'f' => 'ふぇ', 'm' => 'め', 'y' => 'いぇ', 'r' => 'れ', 'w' => 'うぇ', | |
'v' => 'ヴぇ', 'l' => 'ぇ', 'x' => 'ぇ', '' => 'え' | |
), | |
'o' => array ( | |
'tch' => 'っちょ', 'ts' => 'つぉ', | |
'sw' => 'すぉ', 'tw' => 'とぉ', 'dw' => 'どぉ', 'fw' => 'ふぉ', 'sh' => 'しょ', | |
'zh' => 'じょ', 'ch' => 'ちょ', 'th' => 'てょ', 'dh' => 'でょ', 'wh' => 'うぉ', | |
'ky' => 'きょ', 'qy' => 'くょ', 'gy' => 'ぎょ', 'sy' => 'しょ', 'zy' => 'じょ', | |
'jy' => 'じょ', 'cy' => 'ちょ', 'ty' => 'ちょ', 'dy' => 'ぢょ', 'ny' => 'にょ', | |
'hy' => 'ひょ', 'by' => 'びょ', 'py' => 'ぴょ', 'fy' => 'ふょ', 'my' => 'みょ', | |
'ry' => 'りょ', 'vy' => 'ヴょ', 'ly' => 'ょ', 'xy' => 'ょ', | |
'k' => 'こ', 'q' => 'くぉ', 'g' => 'ご', 's' => 'そ', 'c' => 'こ', 'z' => 'ぞ', | |
'j' => 'じょ', 't' => 'と', 'd' => 'ど', 'n' => 'の', 'h' => 'ほ', 'b' => 'ぼ', | |
'p' => 'ぽ', 'f' => 'ふぉ', 'm' => 'も', 'y' => 'よ', 'r' => 'ろ', 'w' => 'を', | |
'v' => 'ヴぉ', 'l' => 'ぉ', 'x' => 'ぉ', '' => 'お' | |
) | |
); | |
# かなで構成されているかをざっくり調べる | |
public static function isKana($str) { | |
$kanaNum = preg_match_all("/[ぁ-ヶ]/u", $str, $m); | |
return($kanaNum * 2 > mb_strlen($str)); | |
} | |
# vot@u→ひらがな | |
public static function AtoK($str) { | |
# 配列なかったら今のうちに生成 | |
if (!isset(self::$flippedMap)) { | |
self::$flippedMap = array_flip(self::$baseMap) + array_flip(self::$shiftMap); | |
# 濁点・半濁点をくっつくやつに | |
self::$flippedMap['@'] = "\xE3\x82\x99"; | |
self::$flippedMap['`'] = "\xE3\x82\x99"; | |
self::$flippedMap['‘'] = "\xE3\x82\x99"; | |
self::$flippedMap['['] = "\xE3\x82\x9A"; | |
# ヵとヶをひらがなに | |
self::$flippedMap['T'] = "\xE3\x82\x95"; | |
self::$flippedMap['*'] = "\xE3\x82\x96"; | |
} | |
$str = mb_convert_kana($str, 'AS'); | |
# A オプションで変換されないもの | |
$str = str_replace(array('"', '\'', '\\', '~'), array('”', '’', '¥', '~'), $str); | |
$len = mb_strlen($str); | |
$result = ''; | |
for ($i = 0; $i < $len; $i++) $result .= self::A2K(mb_substr($str, $i, 1)); | |
return($result); | |
} | |
# ちりせくちこいか→alphabet | |
public static function KtoA($str) { | |
# 配列なかったら今のうちに生成 | |
if (!isset(self::$mergedMap)) { | |
self::$mergedMap = self::$baseMap + self::$shiftMap; | |
# Shif + 0 で文字は出ない | |
self::$flippedMap['を'] = ''; | |
# くっつく濁点・半濁点も一応 | |
self::$flippedMap["\xE3\x82\x99"] = '@'; | |
self::$flippedMap["\xE3\x82\x9A"] = '['; | |
# ひらがなのヵとヶも一応 | |
self::$flippedMap["\xE3\x82\x95"] = 'T'; | |
self::$flippedMap["\xE3\x82\x96"] = '*'; | |
} | |
$str = mb_convert_kana($str, 'Hc'); | |
$len = mb_strlen($str); | |
$result = ''; | |
for ($i = 0; $i < $len; $i++) $result .= self::K2A(mb_substr($str, $i, 1)); | |
return($result); | |
} | |
# ro-maji→ろーまじ | |
public static function AtoR($str) { | |
$str = self::KtoA($str); | |
$str = mb_convert_case($str, MB_CASE_LOWER); | |
$str = mb_convert_kana($str, 'r'); | |
# 音節っぽいものがなかったら諦める | |
if (!preg_match_all(self::SyllableReg, $str, $match, PREG_SET_ORDER)) | |
return($str); | |
$result = ''; | |
foreach ($match as $m) $result .= self::A2R($m[1], $m[2]); | |
return($result); | |
} | |
# すらほもちまに→ろーまじ | |
public static function KtoR($str) { | |
return(self::AtoR(self::KtoA($str))); | |
} | |
# a→ち | |
private static function A2K($char) { | |
return(isset(self::$flippedMap[$char]) ? self::$flippedMap[$char] : $char); | |
} | |
# ち→a | |
private static function K2A($char) { | |
return(isset(self::$mergedMap[$char]) ? self::$mergedMap[$char] : $char); | |
} | |
# ka→か | |
private static function A2R($consonant, $vowel) { | |
# ラストが母音で終わってなかった時用 | |
if ($vowel == '') return(self::H2LV(self::N2SN(self::TT2GR($consonant)))); | |
# 母音だけ | |
if (!strlen($consonant)) return(self::$romajiMap[$vowel]['']); | |
# 促音 | |
$consonant = self::TT2GR($consonant); | |
$result = ''; | |
foreach (self::$romajiMap[$vowel] as $c => $rep) { | |
if ($c == '') continue; | |
$reg = "/{$c}$/"; | |
if (preg_match($reg, $consonant)) { | |
$result = preg_replace($reg, $rep, $consonant, 1); | |
break; | |
} | |
} | |
# 引っかからなかったら母音をつけとく | |
if ($result == '') $result = $consonant . self::$romajiMap[$vowel]['']; | |
# 撥音と長音 | |
$result = self::H2LV(self::N2SN($result)); | |
return($result); | |
} | |
# 撥音の処理 | |
private static function N2SN($str) { | |
return(preg_replace(self::SylNasReg, 'ん', $str)); | |
} | |
# 長音の処理 | |
private static function H2LV($str) { | |
return(preg_replace(self::LonVowReg, 'ー', $str)); | |
} | |
# 促音の処理 | |
private static function TT2GR($str) { | |
return(preg_replace(self::GemConReg, "っ$1", $str)); | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment