Skip to content

Instantly share code, notes, and snippets.

@guersam
Last active October 18, 2018 08:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guersam/ce77eac6ef0c393983f53ab3d3f92f25 to your computer and use it in GitHub Desktop.
Save guersam/ce77eac6ef0c393983f53ab3d3f92f25 to your computer and use it in GitHub Desktop.
한글 유니코드 자소 분리
object KoreanJasoDecomposer {
/**
* 한글 음절
*
* @param onset 초성
* @param nucleus 중성
* @param coda 종성
*/
case class Syllable(
onset: Option[Char],
nucleus: Option[Char],
coda: Option[Char]
)
/**
* 유니코드 한글자소 분리
*
* @param syllable 유니코드 한글 음절
* @return 분리된 자소 모음
*/
def decompose(syllable: Char): Syllable = {
val base = syllable - BaseOffset
val onsetIdx = base / OnsetOffset
val nucleusIdx = (base - (OnsetOffset * onsetIdx)) / NucleusOffset
val codaIdx = base - (OnsetOffset * onsetIdx) - (NucleusOffset * nucleusIdx)
Syllable(
onset = safeGet(OnsetChars, onsetIdx),
nucleus = safeGet(NucleusChars, nucleusIdx),
coda = if (codaIdx == 0) None else safeGet(CodaChars, codaIdx)
)
}
/**
* 한글 음절에서 초성 추출
*
* @param syllable 유니코드 한글 음절
* @return 초성
*/
def onset(syllable: Char): Option[Char] = decompose(syllable).onset
/**
* 중성
*
* @param syllable 유니코드 한글 음절
* @return 중성
*/
def nucleus(syllable: Char): Option[Char] = decompose(syllable).nucleus
/**
* 종성
*
* @param syllable 유니코드 한글 음절
* @return 종성
*/
def coda(syllable: Char): Option[Char] = decompose(syllable).coda
private val BaseOffset = 44032
private val OnsetOffset = 588
private val NucleusOffset = 28
private val OnsetChars = Array('ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ',
'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ')
private val NucleusChars = Array('ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ',
'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ')
private val CodaChars =
Array(' ', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ',
'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ')
private def safeGet(arr: Array[Char], idx: Int): Option[Char] =
if (arr.indices contains idx) Some(arr(idx))
else None
}
import cats.implicits._
object KoreanPostfixer {
/**
* 로/으로
*
* {{{
* import KoreanPostfixer._
*
* assert("고양이".`-(으)로` === "고양이로")
* assert( "고먐".`-(으)로` === "고먐으로")
* }}}
*
* @param body 앞말
* @return 앞말 + (으)로
*/
def `-(으)로`(body: String): String =
body.lastOption.foldMap { c =>
KoreanJasoDecomposer.coda(c) match {
case None | Some('ㄹ') => body + "로"
case _ => body + "으로"
}
}
object ops {
implicit class KoreanPostfixerOps(val str: String) extends AnyVal {
def `-(으)로`: String = KoreanPostfixer.`-(으)로`(str)
}
}
}
import org.scalatest.FunSuite
class KoreanPostfixerTest extends FunSuite {
import KoreanPostfixer.ops._
test("-(으)로") {
assert("고양일".`-(으)로` === "고양일로")
assert("고양이".`-(으)로` === "고양이로")
assert("고양삼".`-(으)로` === "고양삼으로")
}
}
@guersam
Copy link
Author

guersam commented Oct 8, 2018

자모? 자소?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment