Created
January 23, 2019 07:24
-
-
Save riverleo/e577a4d999d237fa27409ec72cdeed4d to your computer and use it in GitHub Desktop.
replaceDakuten.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import _ from 'lodash'; | |
export const DAKUTEN_CODE = 12441; // \u3099 | |
export const HAN_DAKUTEN_CODE = 12442; // \u309a | |
/** | |
* In Japanese, find the dot or dot string and convert it to the correct full-width string. | |
* | |
* ぁ: 12353, あ: 12354, ぃ: 12355, い: 12356, ぅ: 12357, う: 12358, ぇ: 12359 | |
* え: 12360, ぉ: 12361, お: 12362, か: 12363, が: 12364, き: 12365, ぎ: 12366 | |
* く: 12367, ぐ: 12368, け: 12369, げ: 12370, こ: 12371, ご: 12372, さ: 12373 | |
* ざ: 12374, し: 12375, じ: 12376, す: 12377, ず: 12378, せ: 12379, ぜ: 12380 | |
* そ: 12381, ぞ: 12382, た: 12383, だ: 12384, ち: 12385, ぢ: 12386, っ: 12387 | |
* つ: 12388, づ: 12389, て: 12390, で: 12391, と: 12392, ど: 12393, な: 12394 | |
* に: 12395, ぬ: 12396, ね: 12397, の: 12398, は: 12399, ば: 12400, ぱ: 12401 | |
* ひ: 12402, び: 12403, ぴ: 12404, ふ: 12405, ぶ: 12406, ぷ: 12407, へ: 12408 | |
* べ: 12409, ぺ: 12410, ほ: 12411, ぼ: 12412, ぽ: 12413, ま: 12414, み: 12415 | |
* む: 12416, め: 12417, も: 12418, ゃ: 12419, や: 12420, ゅ: 12421, ゆ: 12422 | |
* ょ: 12423, よ: 12424, ら: 12425, り: 12426, る: 12427, れ: 12428, ろ: 12429 | |
* ゎ: 12430, わ: 12431, ゐ: 12432, ゑ: 12433, を: 12434, ん: 12435, ゔ: 12436 | |
* | |
* ァ: 12449, ア: 12450, ィ: 12451, イ: 12452, ゥ: 12453, ウ: 12454, ェ: 12455 | |
* エ: 12456, ォ: 12457, オ: 12458, カ: 12459, ガ: 12460, キ: 12461, ギ: 12462 | |
* ク: 12463, グ: 12464, ケ: 12465, ゲ: 12466, コ: 12467, ゴ: 12468, サ: 12469 | |
* ザ: 12470, シ: 12471, ジ: 12472, ス: 12473, ズ: 12474, セ: 12475, ゼ: 12476 | |
* ソ: 12477, ゾ: 12478, タ: 12479, ダ: 12480, チ: 12481, ヂ: 12482, ッ: 12483 | |
* ツ: 12484, ヅ: 12485, テ: 12486, デ: 12487, ト: 12488, ド: 12489, ナ: 12490 | |
* ニ: 12491, ヌ: 12492, ネ: 12493, ノ: 12494, ハ: 12495, バ: 12496, パ: 12497 | |
* ヒ: 12498, ビ: 12499, ピ: 12500, フ: 12501, ブ: 12502, プ: 12503, ヘ: 12504 | |
* ベ: 12505, ペ: 12506, ホ: 12507, ボ: 12508, ポ: 12509, マ: 12510, ミ: 12511 | |
* ム: 12512, メ: 12513, モ: 12514, ャ: 12515, ヤ: 12516, ュ: 12517, ユ: 12518 | |
* ョ: 12519, ヨ: 12520, ラ: 12521, リ: 12522, ル: 12523, レ: 12524, ロ: 12525 | |
* ヮ: 12526, ワ: 12527, ヰ: 12528, ヱ: 12529, ヲ: 12530, ン: 12531, ヴ: 12532 | |
*/ | |
export default (raw) => { | |
let changed = raw; | |
// 탁점 처리 | |
let index = _.findIndex(changed, c => c.charCodeAt() === DAKUTEN_CODE); | |
while (index > -1) { | |
const target = changed[index - 1]; | |
const targetCode = target.charCodeAt(); | |
let replaced; | |
if ( | |
targetCode === 12363 | |
|| targetCode === 12365 | |
|| targetCode === 12367 | |
|| targetCode === 12369 | |
|| targetCode === 12371 | |
|| targetCode === 12373 | |
|| targetCode === 12375 | |
|| targetCode === 12377 | |
|| targetCode === 12379 | |
|| targetCode === 12381 | |
|| targetCode === 12383 | |
|| targetCode === 12385 | |
|| targetCode === 12388 | |
|| targetCode === 12390 | |
|| targetCode === 12392 | |
|| targetCode === 12399 | |
|| targetCode === 12402 | |
|| targetCode === 12405 | |
|| targetCode === 12408 | |
|| targetCode === 12411 | |
|| targetCode === 12459 | |
|| targetCode === 12461 | |
|| targetCode === 12463 | |
|| targetCode === 12465 | |
|| targetCode === 12467 | |
|| targetCode === 12469 | |
|| targetCode === 12471 | |
|| targetCode === 12473 | |
|| targetCode === 12475 | |
|| targetCode === 12477 | |
|| targetCode === 12479 | |
|| targetCode === 12481 | |
|| targetCode === 12484 | |
|| targetCode === 12486 | |
|| targetCode === 12488 | |
|| targetCode === 12495 | |
|| targetCode === 12498 | |
|| targetCode === 12501 | |
|| targetCode === 12504 | |
|| targetCode === 12507 | |
) { | |
replaced = String.fromCharCode(targetCode + 1); | |
} else { | |
replaced = `${target}${changed[index]}`; | |
} | |
changed = _.replace(changed, `${target}${changed[index]}`, replaced); | |
index = _.findIndex(changed, c => c.charCodeAt() === DAKUTEN_CODE, index + 1); | |
} | |
// 반탁점 처리 | |
index = _.findIndex(changed, c => c.charCodeAt() === HAN_DAKUTEN_CODE); | |
while (index > -1) { | |
const target = changed[index - 1]; | |
const targetCode = target.charCodeAt(); | |
let replaced; | |
if ( | |
targetCode === 12399 | |
|| targetCode === 12402 | |
|| targetCode === 12405 | |
|| targetCode === 12408 | |
|| targetCode === 12411 | |
|| targetCode === 12495 | |
|| targetCode === 12498 | |
|| targetCode === 12501 | |
|| targetCode === 12504 | |
|| targetCode === 12507 | |
) { | |
replaced = String.fromCharCode(targetCode + 2); | |
} else { | |
replaced = `${target}${changed[index]}`; | |
} | |
changed = _.replace(changed, `${target}${changed[index]}`, replaced); | |
index = _.findIndex(changed, c => c.charCodeAt() === HAN_DAKUTEN_CODE, index + 1); | |
} | |
return changed; | |
}; |
Author
riverleo
commented
Jan 23, 2019
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment