Skip to content

Instantly share code, notes, and snippets.

@oozoofrog
Last active November 13, 2022 10:49
Show Gist options
  • Save oozoofrog/07d7eec63750c4992b09e2709f180497 to your computer and use it in GitHub Desktop.
Save oozoofrog/07d7eec63750c4992b09e2709f180497 to your computer and use it in GitHub Desktop.
한글 유니코드 다루기
import Cocoa
var str = "궉토abcd스234꾹타ㅎ하후훼의"
extension Collection {
var toArray: [Element] {
return Array(self)
}
}
protocol UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { get }
}
extension UInt32: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension UInt16: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension Int: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension Unicode.Scalar: Strideable {
public typealias Stride = Int32
public func distance(to other: Unicode.Scalar) -> Stride {
return Int32(other.value) - Int32(self.value)
}
public func advanced(by n: Int32) -> Unicode.Scalar {
return Unicode.Scalar(self.value + UInt32(n)) ?? self
}
var toCharacter: Character { return Character(self) }
}
class KoreanUnicode {
/// NFD 초성 유니코드 값
/// ᄀ, ᄁ, ᄂ, ᄃ, ᄄ, ᄅ, ᄆ, ᄇ, ᄈ, ᄉ, ᄊ, ᄋ, ᄌ, ᄍ, ᄎ, ᄏ, ᄐ, ᄑ, ᄒ
private(set) lazy var initialConsonant: ClosedRange<Unicode.Scalar> = 0x1100.toUnicodeScalar...0x1112.toUnicodeScalar
/// NFD 중성 유니코드 값
/// ᅡ, ᅢ, ᅣ, ᅤ, ᅥ, ᅦ, ᅧ, ᅨ, ᅩ, ᅪ, ᅫ, ᅬ, ᅭ, ᅮ, ᅯ, ᅰ, ᅱ, ᅲ, ᅳ, ᅴ, ᅵ
private(set) lazy var medial: ClosedRange<Unicode.Scalar> = 0x1161.toUnicodeScalar...0x1175.toUnicodeScalar
/// NFD 종성 유니코드 값
/// 이 이후에도 어학적 의미가 있는 값은 있으나 토스에서 쓰일리는 없다고 판단해 여기까지
/// ᆨ, ᆩ, ᆪ, ᆫ, ᆬ, ᆭ, ᆮ, ᆯ, ᆰ, ᆱ, ᆲ, ᆳ, ᆴ, ᆵ, ᆶ, ᆷ, ᆸ, ᆹ, ᆺ, ᆻ, ᆼ, ᆽ, ᆾ, ᆿ, ᇀ, ᇁ, ᇂ
private(set) lazy var finalConsonant: ClosedRange<Unicode.Scalar> = 0x11A8.toUnicodeScalar...0x11C2.toUnicodeScalar
/// 한글 호환 자모 영역
private(set) lazy var koreanCompatibilityJamo: ClosedRange<Unicode.Scalar> = 0x3131.toUnicodeScalar...0x3163.toUnicodeScalar
/// NFC 완성형 한글영역
private(set) lazy var koreanOfNFC: ClosedRange<Unicode.Scalar> = 0xAC00.toUnicodeScalar...0xD7A3.toUnicodeScalar
/// NFD + Korean Compatibility Jamo + NFC 유니코드 한글 영역
private(set) lazy var korean = Array(initialConsonant) + Array(medial) + Array(finalConsonant) + Array(koreanCompatibilityJamo) + Array(koreanOfNFC)
private(set) lazy var compatibilityInitialConsonantConvertTable: [Character: Character] =
[Character("ᄀ"): Character("ㄱ"),
Character("ᄁ"): Character("ㄲ"),
Character("ᄂ"): Character("ㄴ"),
Character("ᄃ"): Character("ㄷ"),
Character("ᄄ"): Character("ㄸ"),
Character("ᄅ"): Character("ㄹ"),
Character("ᄆ"): Character("ㅁ"),
Character("ᄇ"): Character("ㅂ"),
Character("ᄈ"): Character("ㅃ"),
Character("ᄉ"): Character("ㅅ"),
Character("ᄊ"): Character("ㅆ"),
Character("ᄋ"): Character("ㅇ"),
Character("ᄌ"): Character("ㅈ"),
Character("ᄍ"): Character("ㅉ"),
Character("ᄎ"): Character("ㅊ"),
Character("ᄏ"): Character("ㅋ"),
Character("ᄐ"): Character("ㅌ"),
Character("ᄑ"): Character("ㅍ"),
Character("ᄒ"): Character("ㅎ")]
private(set) lazy var compatibilityMedialConvertTable: [Character: Character] =
[Character("ᅡ"): Character("ㅏ"),
Character("ᅢ"): Character("ㅐ"),
Character("ᅣ"): Character("ㅑ"),
Character("ᅤ"): Character("ㅒ"),
Character("ᅥ"): Character("ㅓ"),
Character("ᅦ"): Character("ㅔ"),
Character("ᅧ"): Character("ㅕ"),
Character("ᅨ"): Character("ㅖ"),
Character("ᅩ"): Character("ㅗ"),
Character("ᅪ"): Character("ㅘ"),
Character("ᅫ"): Character("ㅙ"),
Character("ᅬ"): Character("ㅚ"),
Character("ᅭ"): Character("ㅛ"),
Character("ᅮ"): Character("ㅜ"),
Character("ᅯ"): Character("ㅝ"),
Character("ᅰ"): Character("ㅞ"),
Character("ᅱ"): Character("ㅟ"),
Character("ᅲ"): Character("ㅠ"),
Character("ᅳ"): Character("ㅡ"),
Character("ᅴ"): Character("ㅢ"),
Character("ᅵ"): Character("ㅣ")]
private(set) lazy var compatibilityFinalConsonantConvertTable: [Character: Character] =
[Character("ᆨ"): Character("ㄱ"),
Character("ᆩ"): Character("ㄲ"),
Character("ᆪ"): Character("ㄳ"),
Character("ᆫ"): Character("ㄴ"),
Character("ᆬ"): Character("ㄵ"),
Character("ᆭ"): Character("ㄶ"),
Character("ᆮ"): Character("ㄷ"),
Character("ᆯ"): Character("ㄹ"),
Character("ᆰ"): Character("ㄺ"),
Character("ᆱ"): Character("ㄻ"),
Character("ᆲ"): Character("ㄼ"),
Character("ᆳ"): Character("ㄽ"),
Character("ᆴ"): Character("ㄾ"),
Character("ᆵ"): Character("ㄿ"),
Character("ᆶ"): Character("ㅀ"),
Character("ᆷ"): Character("ㅁ"),
Character("ᆸ"): Character("ㅂ"),
Character("ᆹ"): Character("ㅄ"),
Character("ᆺ"): Character("ㅅ"),
Character("ᆻ"): Character("ㅆ"),
Character("ᆼ"): Character("ㅇ"),
Character("ᆽ"): Character("ㅈ"),
Character("ᆾ"): Character("ㅊ"),
Character("ᆿ"): Character("ㅋ"),
Character("ᇀ"): Character("ㅌ"),
Character("ᇁ"): Character("ㅍ"),
Character("ᇂ"): Character("ㅎ")]
func isInitialConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool {
return initialConsonant.contains(unicodeScalar)
}
func isMedial(_ unicodeScalar: Unicode.Scalar) -> Bool {
return medial.contains(unicodeScalar)
}
func isFinalConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool {
return finalConsonant.contains(unicodeScalar)
}
func isKorean(_ unicodeScalar: Unicode.Scalar) -> Bool {
return korean.contains(unicodeScalar)
}
func isKorean(_ character: Character) -> Bool {
return character.unicodeScalars.allSatisfy(isKorean)
}
/// 한글만 들어있는 경우 true를 반환하고 한글만 들어있지 않은 경우는 false를 반환
func isKorean(_ string: String) -> Bool {
return string
.decomposedStringWithCompatibilityMapping
.unicodeScalars
.toArray
.allSatisfy(isKorean)
}
/// 한글이 들어있는 경우는 true를 반환, 한글이 전혀 들어있지 않은 경우 false를 반환
func hasKorean(_ string: String) -> Bool {
for scalar in string.decomposedStringWithCompatibilityMapping.unicodeScalars.toArray where isKorean(scalar) {
return true
}
return false
}
func koreanCompatibilityJamoCharacterFromNFDCharacter(_ character: Character) -> Character {
if let initialConsonant = compatibilityInitialConsonantConvertTable[character] {
return initialConsonant
} else if let medial = compatibilityMedialConvertTable[character] {
return medial
} else if let finalConsonant = compatibilityFinalConsonantConvertTable[character] {
return finalConsonant
} else {
return character
}
}
}
let korean = KoreanUnicode()
extension String {
var isKorean: Bool {
return korean.isKorean(self)
}
var hasKorean: Bool {
return korean.hasKorean(self)
}
var koreanOnly: String {
return filter(korean.isKorean)
}
var koreanInitialConsonantOnly: String {
return decomposedStringWithCompatibilityMapping
.unicodeScalars
.filter(korean.isInitialConsonant)
.map(Character.init)
.map(String.init)
.joined()
}
func decomposedUnicodeScalars() -> [Unicode.Scalar] {
return Array(decomposedStringWithCompatibilityMapping.unicodeScalars)
}
func decomposed() -> String {
return decomposedStringWithCompatibilityMapping
.map { $0.unicodeScalars.map { $0.toCharacter } }
.flatMap({ $0 })
.map({ $0.description })
.joined()
}
var toKoreanCompatiblityJamo: String {
return decomposedStringWithCompatibilityMapping
.map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter)
.map { $0.description }.joined()
}
}
let a: String = "안녕하세요.뿡뿡뿡helloおはよう宜しくね脳"
let b = a.decomposed()
print(Array(a.unicodeScalars.map(Character.init)))
print(Array(b.unicodeScalars.map(Character.init)))
print(a == b)
print(b.unicodeScalars.map(Character.init).map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter).map(String.init).joined())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment