Last active
January 20, 2016 20:17
-
-
Save foriequal0/ceda3a2c474f3699172d to your computer and use it in GitHub Desktop.
한글 자소 분리 조합
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var hangul = (function() { | |
var ChoSeong = [ | |
0x3131, 0x3132, 0x3134, 0x3137, 0x3138, | |
0x3139, 0x3141, 0x3142, 0x3143, 0x3145, | |
0x3146, 0x3147, 0x3148, 0x3149, 0x314a, | |
0x314b, 0x314c, 0x314d, 0x314e | |
]; | |
var JungSeong = [ | |
0x314f, 0x3150, 0x3151, 0x3152, 0x3153, | |
0x3154, 0x3155, 0x3156, 0x3157, 0x3158, | |
0x3159, 0x315a, 0x315b, 0x315c, 0x315d, | |
0x315e, 0x315f, 0x3160, 0x3161, 0x3162, | |
0x3163 | |
]; | |
var JongSeong = [ | |
0x0000, 0x3131, 0x3132, 0x3133, 0x3134, | |
0x3135, 0x3136, 0x3137, 0x3139, 0x313a, | |
0x313b, 0x313c, 0x313d, 0x313e, 0x313f, | |
0x3140, 0x3141, 0x3142, 0x3144, 0x3145, | |
0x3146, 0x3147, 0x3148, 0x314a, 0x314b, | |
0x314c, 0x314d, 0x314e | |
]; | |
var pushCharCode = function(array, charCode) { | |
array.push(String.fromCharCode(charCode)); | |
} | |
var split = function(text) { | |
var v = []; | |
for (var i = 0; i < text.length; i++) { | |
var ch = text.charCodeAt(i); | |
if (ch >= 0xAC00 && ch <= 0xD7A3) { | |
var i1, i2, i3; | |
i3 = ch - 0xAC00; | |
i1 = Math.floor(i3 / (21 * 28)); | |
i3 = i3 % (21 * 28); | |
i2 = Math.floor(i3 / 28); | |
i3 = i3 % 28; | |
pushCharCode(v, ChoSeong[i1]); | |
pushCharCode(v, JungSeong[i2]); | |
if (i3 != 0x0000) pushCharCode(v, JongSeong[i3]); | |
} else { | |
pushCharCode(v, ch); | |
} | |
} | |
return v; | |
} | |
var join = function(jaso, more) { | |
more = arguments[1] || false; | |
var johap = null; | |
var johap_stash = null; | |
var johap_new = null; | |
var res = []; | |
var state = 0; | |
var contains = function(array, item) { | |
return $.inArray(item, array) >= 0; | |
} | |
for (var i = 0; i < jaso.length; i++) { | |
var ch = jaso[i].charCodeAt(0); | |
if (!(contains(ChoSeong, ch) || contains(JungSeong, ch) || contains(JongSeong, ch))) { | |
if (johap != null) { | |
pushCharCode(res, johap); | |
} | |
pushCharCode(res, ch); | |
johap = null; | |
johap_stash = null; | |
johap_new = null; | |
state = 0; | |
continue; | |
} | |
var jaum = contains(ChoSeong, ch) || contains(JongSeong, ch); | |
switch (state) { | |
case 0: | |
// (초) | |
if (jaum) { | |
johap = ch; | |
state = 1; | |
} else { | |
if (johap) | |
pushCharCode(res, johap); | |
else | |
pushCharCode(res, ch); | |
johap = null; | |
state = 0; | |
} | |
break; | |
case 1: | |
// 초(중) | |
if (!jaum) { | |
johap = 0xAC00 + $.inArray(johap, ChoSeong) * (21 * 28); | |
johap = johap + $.inArray(ch, JungSeong) * 28; | |
state = 2; | |
} else { | |
pushCharCode(res, johap); | |
johap = ch; | |
state = 1; | |
} | |
break; | |
case 2: | |
// 초중(초) | |
johap_stash = johap; | |
johap_new = 0xAC00 + $.inArray(ch, ChoSeong) * (21 * 28); | |
johap = johap + $.inArray(ch, JongSeong); | |
state = 3; | |
break; | |
case 3: | |
// 초중초(?) | |
if (jaum) { // 초중초(초) | |
pushCharCode(res, johap); | |
johap = 0xAC00 + $.inArray(ch, ChoSeong) * (21 * 28); | |
johap_stash = null; | |
johap_new = null; | |
state = 1; | |
} else { // 초중초(중) | |
pushCharCode(res, johap_stash); | |
johap = johap_new + $.inArray(ch, JungSeong) * 28; | |
johap_stash = null; | |
johap_new = null; | |
state = 2; | |
} | |
break; | |
} | |
} | |
if (johap != null) { | |
pushCharCode(res, johap); | |
} | |
if (more) { | |
return res.join(""); | |
} else { | |
return res; | |
} | |
} | |
var split_strict = function(text) { | |
var v = []; | |
for (var i = 0; i < text.length; i++) { | |
var tmp = []; | |
var ch = text.charCodeAt(i); | |
if (ch >= 0xAC00 && ch <= 0xD7A3) { | |
var i1, i2, i3; | |
i3 = ch - 0xAC00; | |
i1 = Math.floor(i3 / (21 * 28)); | |
i3 = i3 % (21 * 28); | |
i2 = Math.floor(i3 / 28); | |
i3 = i3 % 28; | |
pushCharCode(tmp, ChoSeong[i1]); | |
pushCharCode(tmp, JungSeong[i2]); | |
if (i3 != 0x0000) pushCharCode(tmp, JongSeong[i3]); | |
} else { | |
pushCharCode(tmp, ch); | |
} | |
v.push(tmp); | |
} | |
return v; | |
} | |
var join_strict = function(strict_jaso, more) { | |
more = arguments[1] || false; | |
var res = []; | |
for (var i = 0; i < strict_jaso.length; i++) { | |
var chars = strict_jaso[i]; | |
var johap = join(chars, true); | |
res.push(johap); | |
} | |
if (more) { | |
return res.join(""); | |
} else { | |
return res; | |
} | |
} | |
return { | |
split: split, | |
join: join, | |
split_strict: split_strict, | |
join_strict: join_strict, | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment