Last active
November 13, 2022 10:49
-
-
Save oozoofrog/07d7eec63750c4992b09e2709f180497 to your computer and use it in GitHub Desktop.
한글 유니코드 다루기
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Cocoa | |
var str = "궉토abcd스234꾹타ㅎ하후훼의" | |
extension Collection { | |
var toArray: [Element] { | |
return Array(self) | |
} | |
} | |
protocol UnicodeScalarCreatable { | |
var toUnicodeScalar: Unicode.Scalar { get } | |
} | |
extension UInt32: UnicodeScalarCreatable { | |
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) } | |
} | |
extension UInt16: UnicodeScalarCreatable { | |
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) } | |
} | |
extension Int: UnicodeScalarCreatable { | |
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) } | |
} | |
extension Unicode.Scalar: Strideable { | |
public typealias Stride = Int32 | |
public func distance(to other: Unicode.Scalar) -> Stride { | |
return Int32(other.value) - Int32(self.value) | |
} | |
public func advanced(by n: Int32) -> Unicode.Scalar { | |
return Unicode.Scalar(self.value + UInt32(n)) ?? self | |
} | |
var toCharacter: Character { return Character(self) } | |
} | |
class KoreanUnicode { | |
/// NFD 초성 유니코드 값 | |
/// ᄀ, ᄁ, ᄂ, ᄃ, ᄄ, ᄅ, ᄆ, ᄇ, ᄈ, ᄉ, ᄊ, ᄋ, ᄌ, ᄍ, ᄎ, ᄏ, ᄐ, ᄑ, ᄒ | |
private(set) lazy var initialConsonant: ClosedRange<Unicode.Scalar> = 0x1100.toUnicodeScalar...0x1112.toUnicodeScalar | |
/// NFD 중성 유니코드 값 | |
/// ᅡ, ᅢ, ᅣ, ᅤ, ᅥ, ᅦ, ᅧ, ᅨ, ᅩ, ᅪ, ᅫ, ᅬ, ᅭ, ᅮ, ᅯ, ᅰ, ᅱ, ᅲ, ᅳ, ᅴ, ᅵ | |
private(set) lazy var medial: ClosedRange<Unicode.Scalar> = 0x1161.toUnicodeScalar...0x1175.toUnicodeScalar | |
/// NFD 종성 유니코드 값 | |
/// 이 이후에도 어학적 의미가 있는 값은 있으나 토스에서 쓰일리는 없다고 판단해 여기까지 | |
/// ᆨ, ᆩ, ᆪ, ᆫ, ᆬ, ᆭ, ᆮ, ᆯ, ᆰ, ᆱ, ᆲ, ᆳ, ᆴ, ᆵ, ᆶ, ᆷ, ᆸ, ᆹ, ᆺ, ᆻ, ᆼ, ᆽ, ᆾ, ᆿ, ᇀ, ᇁ, ᇂ | |
private(set) lazy var finalConsonant: ClosedRange<Unicode.Scalar> = 0x11A8.toUnicodeScalar...0x11C2.toUnicodeScalar | |
/// 한글 호환 자모 영역 | |
private(set) lazy var koreanCompatibilityJamo: ClosedRange<Unicode.Scalar> = 0x3131.toUnicodeScalar...0x3163.toUnicodeScalar | |
/// NFC 완성형 한글영역 | |
private(set) lazy var koreanOfNFC: ClosedRange<Unicode.Scalar> = 0xAC00.toUnicodeScalar...0xD7A3.toUnicodeScalar | |
/// NFD + Korean Compatibility Jamo + NFC 유니코드 한글 영역 | |
private(set) lazy var korean = Array(initialConsonant) + Array(medial) + Array(finalConsonant) + Array(koreanCompatibilityJamo) + Array(koreanOfNFC) | |
private(set) lazy var compatibilityInitialConsonantConvertTable: [Character: Character] = | |
[Character("ᄀ"): Character("ㄱ"), | |
Character("ᄁ"): Character("ㄲ"), | |
Character("ᄂ"): Character("ㄴ"), | |
Character("ᄃ"): Character("ㄷ"), | |
Character("ᄄ"): Character("ㄸ"), | |
Character("ᄅ"): Character("ㄹ"), | |
Character("ᄆ"): Character("ㅁ"), | |
Character("ᄇ"): Character("ㅂ"), | |
Character("ᄈ"): Character("ㅃ"), | |
Character("ᄉ"): Character("ㅅ"), | |
Character("ᄊ"): Character("ㅆ"), | |
Character("ᄋ"): Character("ㅇ"), | |
Character("ᄌ"): Character("ㅈ"), | |
Character("ᄍ"): Character("ㅉ"), | |
Character("ᄎ"): Character("ㅊ"), | |
Character("ᄏ"): Character("ㅋ"), | |
Character("ᄐ"): Character("ㅌ"), | |
Character("ᄑ"): Character("ㅍ"), | |
Character("ᄒ"): Character("ㅎ")] | |
private(set) lazy var compatibilityMedialConvertTable: [Character: Character] = | |
[Character("ᅡ"): Character("ㅏ"), | |
Character("ᅢ"): Character("ㅐ"), | |
Character("ᅣ"): Character("ㅑ"), | |
Character("ᅤ"): Character("ㅒ"), | |
Character("ᅥ"): Character("ㅓ"), | |
Character("ᅦ"): Character("ㅔ"), | |
Character("ᅧ"): Character("ㅕ"), | |
Character("ᅨ"): Character("ㅖ"), | |
Character("ᅩ"): Character("ㅗ"), | |
Character("ᅪ"): Character("ㅘ"), | |
Character("ᅫ"): Character("ㅙ"), | |
Character("ᅬ"): Character("ㅚ"), | |
Character("ᅭ"): Character("ㅛ"), | |
Character("ᅮ"): Character("ㅜ"), | |
Character("ᅯ"): Character("ㅝ"), | |
Character("ᅰ"): Character("ㅞ"), | |
Character("ᅱ"): Character("ㅟ"), | |
Character("ᅲ"): Character("ㅠ"), | |
Character("ᅳ"): Character("ㅡ"), | |
Character("ᅴ"): Character("ㅢ"), | |
Character("ᅵ"): Character("ㅣ")] | |
private(set) lazy var compatibilityFinalConsonantConvertTable: [Character: Character] = | |
[Character("ᆨ"): Character("ㄱ"), | |
Character("ᆩ"): Character("ㄲ"), | |
Character("ᆪ"): Character("ㄳ"), | |
Character("ᆫ"): Character("ㄴ"), | |
Character("ᆬ"): Character("ㄵ"), | |
Character("ᆭ"): Character("ㄶ"), | |
Character("ᆮ"): Character("ㄷ"), | |
Character("ᆯ"): Character("ㄹ"), | |
Character("ᆰ"): Character("ㄺ"), | |
Character("ᆱ"): Character("ㄻ"), | |
Character("ᆲ"): Character("ㄼ"), | |
Character("ᆳ"): Character("ㄽ"), | |
Character("ᆴ"): Character("ㄾ"), | |
Character("ᆵ"): Character("ㄿ"), | |
Character("ᆶ"): Character("ㅀ"), | |
Character("ᆷ"): Character("ㅁ"), | |
Character("ᆸ"): Character("ㅂ"), | |
Character("ᆹ"): Character("ㅄ"), | |
Character("ᆺ"): Character("ㅅ"), | |
Character("ᆻ"): Character("ㅆ"), | |
Character("ᆼ"): Character("ㅇ"), | |
Character("ᆽ"): Character("ㅈ"), | |
Character("ᆾ"): Character("ㅊ"), | |
Character("ᆿ"): Character("ㅋ"), | |
Character("ᇀ"): Character("ㅌ"), | |
Character("ᇁ"): Character("ㅍ"), | |
Character("ᇂ"): Character("ㅎ")] | |
func isInitialConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool { | |
return initialConsonant.contains(unicodeScalar) | |
} | |
func isMedial(_ unicodeScalar: Unicode.Scalar) -> Bool { | |
return medial.contains(unicodeScalar) | |
} | |
func isFinalConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool { | |
return finalConsonant.contains(unicodeScalar) | |
} | |
func isKorean(_ unicodeScalar: Unicode.Scalar) -> Bool { | |
return korean.contains(unicodeScalar) | |
} | |
func isKorean(_ character: Character) -> Bool { | |
return character.unicodeScalars.allSatisfy(isKorean) | |
} | |
/// 한글만 들어있는 경우 true를 반환하고 한글만 들어있지 않은 경우는 false를 반환 | |
func isKorean(_ string: String) -> Bool { | |
return string | |
.decomposedStringWithCompatibilityMapping | |
.unicodeScalars | |
.toArray | |
.allSatisfy(isKorean) | |
} | |
/// 한글이 들어있는 경우는 true를 반환, 한글이 전혀 들어있지 않은 경우 false를 반환 | |
func hasKorean(_ string: String) -> Bool { | |
for scalar in string.decomposedStringWithCompatibilityMapping.unicodeScalars.toArray where isKorean(scalar) { | |
return true | |
} | |
return false | |
} | |
func koreanCompatibilityJamoCharacterFromNFDCharacter(_ character: Character) -> Character { | |
if let initialConsonant = compatibilityInitialConsonantConvertTable[character] { | |
return initialConsonant | |
} else if let medial = compatibilityMedialConvertTable[character] { | |
return medial | |
} else if let finalConsonant = compatibilityFinalConsonantConvertTable[character] { | |
return finalConsonant | |
} else { | |
return character | |
} | |
} | |
} | |
let korean = KoreanUnicode() | |
extension String { | |
var isKorean: Bool { | |
return korean.isKorean(self) | |
} | |
var hasKorean: Bool { | |
return korean.hasKorean(self) | |
} | |
var koreanOnly: String { | |
return filter(korean.isKorean) | |
} | |
var koreanInitialConsonantOnly: String { | |
return decomposedStringWithCompatibilityMapping | |
.unicodeScalars | |
.filter(korean.isInitialConsonant) | |
.map(Character.init) | |
.map(String.init) | |
.joined() | |
} | |
func decomposedUnicodeScalars() -> [Unicode.Scalar] { | |
return Array(decomposedStringWithCompatibilityMapping.unicodeScalars) | |
} | |
func decomposed() -> String { | |
return decomposedStringWithCompatibilityMapping | |
.map { $0.unicodeScalars.map { $0.toCharacter } } | |
.flatMap({ $0 }) | |
.map({ $0.description }) | |
.joined() | |
} | |
var toKoreanCompatiblityJamo: String { | |
return decomposedStringWithCompatibilityMapping | |
.map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter) | |
.map { $0.description }.joined() | |
} | |
} | |
let a: String = "안녕하세요.뿡뿡뿡helloおはよう宜しくね脳" | |
let b = a.decomposed() | |
print(Array(a.unicodeScalars.map(Character.init))) | |
print(Array(b.unicodeScalars.map(Character.init))) | |
print(a == b) | |
print(b.unicodeScalars.map(Character.init).map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter).map(String.init).joined()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment