Skip to content

Instantly share code, notes, and snippets.

@riverleo
Created January 23, 2019 07:24
Show Gist options
  • Save riverleo/e577a4d999d237fa27409ec72cdeed4d to your computer and use it in GitHub Desktop.
Save riverleo/e577a4d999d237fa27409ec72cdeed4d to your computer and use it in GitHub Desktop.
replaceDakuten.js
import _ from 'lodash';
export const DAKUTEN_CODE = 12441; // \u3099
export const HAN_DAKUTEN_CODE = 12442; // \u309a
/**
* In Japanese, find the dot or dot string and convert it to the correct full-width string.
*
* ぁ: 12353, あ: 12354, ぃ: 12355, い: 12356, ぅ: 12357, う: 12358, ぇ: 12359
* え: 12360, ぉ: 12361, お: 12362, か: 12363, が: 12364, き: 12365, ぎ: 12366
* く: 12367, ぐ: 12368, け: 12369, げ: 12370, こ: 12371, ご: 12372, さ: 12373
* ざ: 12374, し: 12375, じ: 12376, す: 12377, ず: 12378, せ: 12379, ぜ: 12380
* そ: 12381, ぞ: 12382, た: 12383, だ: 12384, ち: 12385, ぢ: 12386, っ: 12387
* つ: 12388, づ: 12389, て: 12390, で: 12391, と: 12392, ど: 12393, な: 12394
* に: 12395, ぬ: 12396, ね: 12397, の: 12398, は: 12399, ば: 12400, ぱ: 12401
* ひ: 12402, び: 12403, ぴ: 12404, ふ: 12405, ぶ: 12406, ぷ: 12407, へ: 12408
* べ: 12409, ぺ: 12410, ほ: 12411, ぼ: 12412, ぽ: 12413, ま: 12414, み: 12415
* む: 12416, め: 12417, も: 12418, ゃ: 12419, や: 12420, ゅ: 12421, ゆ: 12422
* ょ: 12423, よ: 12424, ら: 12425, り: 12426, る: 12427, れ: 12428, ろ: 12429
* ゎ: 12430, わ: 12431, ゐ: 12432, ゑ: 12433, を: 12434, ん: 12435, ゔ: 12436
*
* ァ: 12449, ア: 12450, ィ: 12451, イ: 12452, ゥ: 12453, ウ: 12454, ェ: 12455
* エ: 12456, ォ: 12457, オ: 12458, カ: 12459, ガ: 12460, キ: 12461, ギ: 12462
* ク: 12463, グ: 12464, ケ: 12465, ゲ: 12466, コ: 12467, ゴ: 12468, サ: 12469
* ザ: 12470, シ: 12471, ジ: 12472, ス: 12473, ズ: 12474, セ: 12475, ゼ: 12476
* ソ: 12477, ゾ: 12478, タ: 12479, ダ: 12480, チ: 12481, ヂ: 12482, ッ: 12483
* ツ: 12484, ヅ: 12485, テ: 12486, デ: 12487, ト: 12488, ド: 12489, ナ: 12490
* ニ: 12491, ヌ: 12492, ネ: 12493, ノ: 12494, ハ: 12495, バ: 12496, パ: 12497
* ヒ: 12498, ビ: 12499, ピ: 12500, フ: 12501, ブ: 12502, プ: 12503, ヘ: 12504
* ベ: 12505, ペ: 12506, ホ: 12507, ボ: 12508, ポ: 12509, マ: 12510, ミ: 12511
* ム: 12512, メ: 12513, モ: 12514, ャ: 12515, ヤ: 12516, ュ: 12517, ユ: 12518
* ョ: 12519, ヨ: 12520, ラ: 12521, リ: 12522, ル: 12523, レ: 12524, ロ: 12525
* ヮ: 12526, ワ: 12527, ヰ: 12528, ヱ: 12529, ヲ: 12530, ン: 12531, ヴ: 12532
*/
export default (raw) => {
let changed = raw;
// 탁점 처리
let index = _.findIndex(changed, c => c.charCodeAt() === DAKUTEN_CODE);
while (index > -1) {
const target = changed[index - 1];
const targetCode = target.charCodeAt();
let replaced;
if (
targetCode === 12363
|| targetCode === 12365
|| targetCode === 12367
|| targetCode === 12369
|| targetCode === 12371
|| targetCode === 12373
|| targetCode === 12375
|| targetCode === 12377
|| targetCode === 12379
|| targetCode === 12381
|| targetCode === 12383
|| targetCode === 12385
|| targetCode === 12388
|| targetCode === 12390
|| targetCode === 12392
|| targetCode === 12399
|| targetCode === 12402
|| targetCode === 12405
|| targetCode === 12408
|| targetCode === 12411
|| targetCode === 12459
|| targetCode === 12461
|| targetCode === 12463
|| targetCode === 12465
|| targetCode === 12467
|| targetCode === 12469
|| targetCode === 12471
|| targetCode === 12473
|| targetCode === 12475
|| targetCode === 12477
|| targetCode === 12479
|| targetCode === 12481
|| targetCode === 12484
|| targetCode === 12486
|| targetCode === 12488
|| targetCode === 12495
|| targetCode === 12498
|| targetCode === 12501
|| targetCode === 12504
|| targetCode === 12507
) {
replaced = String.fromCharCode(targetCode + 1);
} else {
replaced = `${target}${changed[index]}`;
}
changed = _.replace(changed, `${target}${changed[index]}`, replaced);
index = _.findIndex(changed, c => c.charCodeAt() === DAKUTEN_CODE, index + 1);
}
// 반탁점 처리
index = _.findIndex(changed, c => c.charCodeAt() === HAN_DAKUTEN_CODE);
while (index > -1) {
const target = changed[index - 1];
const targetCode = target.charCodeAt();
let replaced;
if (
targetCode === 12399
|| targetCode === 12402
|| targetCode === 12405
|| targetCode === 12408
|| targetCode === 12411
|| targetCode === 12495
|| targetCode === 12498
|| targetCode === 12501
|| targetCode === 12504
|| targetCode === 12507
) {
replaced = String.fromCharCode(targetCode + 2);
} else {
replaced = `${target}${changed[index]}`;
}
changed = _.replace(changed, `${target}${changed[index]}`, replaced);
index = _.findIndex(changed, c => c.charCodeAt() === HAN_DAKUTEN_CODE, index + 1);
}
return changed;
};
@riverleo
Copy link
Author

import _ from 'lodash';
import replaceDakuten, {
  DAKUTEN_CODE,
  HAN_DAKUTEN_CODE,
} from '../replaceDakuten';


describe('replaceDakuten.js', () => {
  it('탁점이 붙을 수 있는 히라가나인 붙은 경우', () => {
    const c = String.fromCharCode(DAKUTEN_CODE);
    const target = _.join(_.map('かきくけこさしすせそたちつてとはひふへほ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(replaced).toBe('がぎぐげござじずぜぞだぢづでどばびぶべぼ');
  });

  it('탁점이 붙을 수 없는 히라가나인 경우', () => {
    const c = String.fromCharCode(DAKUTEN_CODE);
    const target = _.join(_.map('えぉおがぎぐげござじずぜぞだぢっづでどなにぬねのばぱびぴぶぷべぺぼぽまみ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(target).toBe(replaced);
  });

  it('반탁점이 붙을 수 있는 히라가나인 경우', () => {
    const c = String.fromCharCode(HAN_DAKUTEN_CODE);
    const target = _.join(_.map('はひふへほ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(replaced).toBe('ぱぴぷぺぽ');
  });

  it('반탁점이 붙을 수 없는 히라가나인 경우', () => {
    const c = String.fromCharCode(HAN_DAKUTEN_CODE);
    const target = _.join(_.map('えぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのばぱびぴぶぷべぺぼぽまみ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(target).toBe(replaced);
  });

  it('탁점이 붙을 수 있는 가타가나인 경우', () => {
    const c = String.fromCharCode(DAKUTEN_CODE);
    const target = _.join(_.map('カキクケコサシスセソタチツテトハヒフヘホ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(replaced).toBe('ガギグゲゴザジズゼゾダヂヅデドバビブベボ');
  });

  it('탁점이 붙을 수 없는 가타가나인 경우', () => {
    const c = String.fromCharCode(DAKUTEN_CODE);
    const target = _.join(_.map('ァアィイゥウェエォオガギグゲゴザジズゼゾダヂッヅデドナニヌネノバパビピブプベペボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(target).toBe(replaced);
  });

  it('반탁점이 붙을 수 있는 가타가나인 경우', () => {
    const c = String.fromCharCode(HAN_DAKUTEN_CODE);
    const target = _.join(_.map('ハヒフヘホ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(replaced).toBe('パピプペポ');
  });

  it('반탁점이 붙을 수 없는 가타가나인 경우', () => {
    const c = String.fromCharCode(HAN_DAKUTEN_CODE);
    const target = _.join(_.map('ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノバパビピブプベペボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴ', s => `${s}${c}`), '');
    const replaced = replaceDakuten(target);

    expect(target).toBe(replaced);
  });
});

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment