Skip to content

Instantly share code, notes, and snippets.

@colinjroberts
Created April 1, 2022 04:21
Show Gist options
  • Save colinjroberts/f3a4ca2b3b5525e7f2be3b88a44ab81c to your computer and use it in GitHub Desktop.
Save colinjroberts/f3a4ca2b3b5525e7f2be3b88a44ab81c to your computer and use it in GitHub Desktop.
korean-typing-practice-part3 - wordDecomposer.js
function getText(countOfWordsToReturn, lettersProvided, language){
MaxWordLength = 6
ACCEPTED_LANGUAGES = new Set(['EN', 'KO'])
// Raise errors if inputs are wrong
if (! countOfWordsToReturn && lettersProvided && language){
throw `int count expected, ${countOfWordsToReturn} was `
`provided; string letters expected, `
`${lettersProvided} was provided, `
`string lang was expected, ${language} was provided.`
} else if (parseInt(countOfWordsToReturn) <= 0){
throw `int count must be greater than 0: ${countOfWordsToReturn} was provided`
}
else if (!ACCEPTED_LANGUAGES.has(language)){
throw `Language must be one of ${ACCEPTED_LANGUAGES}: ${language} was provided`
}
// Return only a space if only whitespace is provided
if (! lettersProvided.trim()){
return " "
}
let output = []
// Return words made by corresponding langauge handler
if (language == 'EN'){
output = handleEn(countOfWordsToReturn, lettersProvided, MaxWordLength)
} else if (language == 'KO'){
output = handleKo(countOfWordsToReturn, lettersProvided, MaxWordLength)
} else {
return None
}
return output.join(" ")
}
/* Filter and deduplicate input, generate words using that
*/
function handleEn(countOfWordsToReturn, lettersProvided, MaxWordLength){
// Deduplicate list of letters
SetOfLetters = decomposeWordsEn(lettersProvided)
// Throw an error if there are no ascii letters in the set
if (SetOfLetters.length <= 0){
throw ValueError(`string letters expected at least 1 ascii letter, `
`${SetOfLetters} was provided`)
}
// Create and return the number of requested words
listOfGeneratedWords = createManyWordsEn(SetOfLetters, countOfWordsToReturn, MaxWordLength)
return listOfGeneratedWords
}
/* Filter and decompose input, generate words using that
*/
function handleKo(CountOfWordsToReturn, LettersProvided, MaxWordLength){
// Filter input to only Korean letters
let mapOfKoreanLettersByType = filterAndSortKoreanLetters(LettersProvided)
// Throw an error if there are no Korean letters in any of the lists
let allListsAreEmpty = true
mapOfKoreanLettersByType.forEach( (value, key) => {
allListsAreEmpty = allListsAreEmpty && value.length == 0
})
if(allListsAreEmpty){
throw ValueError(`string letters expected at least 1 korean letter, `
`${LettersProvided} was provided.`)
}
// Create a deduplicated set of letters
setOfLetters = decomposeWordsKo(mapOfKoreanLettersByType)
// Create and return the number of requested words
listOfGeneratedWords = createManyWordsKO(setOfLetters, CountOfWordsToReturn, MaxWordLength)
return listOfGeneratedWords
}
/*
* Returns a decomposed array of letters in the order they appear in words.
* In this code, to decompose means both dividing Korean syllable blocks
* (one 'character'/unicode codepoint that is made up of 2-4 characters),
* into separate codepoints for each letter AND to deduplicate letters
* English and Korean letters.
*
* @param {string} words to be decomposed/deduplicated
* @param {string} the human language of the words (EN for English or KO for Korean)
* @return {Set} the dcomposed/deduplicated letters of the input
*/
// function decomposeWords(words, language){
// if (words.length <=0 || (language != 'EN' && language != 'KO')){
// return new TypeError('words must be of non-zero length and language must be one of EN or KO');
// }
// if (language == 'EN'){
// return decomposeWordsEn(words);
// } else if (language == 'KO'){
// return decomposeWordsKo(words);
// } else {
// throw(Error, "language must be one of EN or KO")
// }
// }
function characterIsInAsciiLetterRange(letter){
let num = ord(letter)
return (num >= 65 && num <= 90) || (num >= 97 && num <= 122)
}
/*
* Takes an array of letters and returns a list of
* all letters in order
*/
function decomposeWordsEn(stringOfLetters){
let setOfLetters = new Set()
for (let char of stringOfLetters){
if (characterIsInAsciiLetterRange(char)){
setOfLetters.add(char.toLowerCase())
}}
return setOfLetters
}
function decomposeWordsKo(mapOfKoreanLettersByType){
// Compatibility Jamo can be directly added to the output
let listOfCompatibilityJamo = []
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(mapOfKoreanLettersByType.get("compatibilityJamo"))
// Initial and medial Jamo can be looked up and added
for (let jamo of mapOfKoreanLettersByType.get("jamo")){
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(lookupJamoKo(jamo))
}
// Syllable characters need to be decomposed
for (let syllable of mapOfKoreanLettersByType.get("syllable")){
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(decomposeKoSyllables(syllable))
}
// Convert to a set to match EN
let setOfCompatibilityJamo = new Set(listOfCompatibilityJamo)
return setOfCompatibilityJamo
}
/*
* Takes a list of Korean syllable blocks and returns a list of
* compatibility jamo in the order they appear
*/
function decomposeKoSyllables(listOfFilteredInput){
let output = []
for (let item of listOfFilteredInput){
// Save hex offsets for initial, medial, and terminal characters
let initialChrRef = 4351 // Initial hangul characters start after '0x10FF'
let midChrRef = 4448 // Initial hangul characters start after '0x1161'
let terminalChrRef = 4519 // Initial hangul characters start after '0x11A8'
// Calculate relative position of each jamo
let relTerminal = (ord(item) - 44032) % 28
let relMid = 1 + Math.floor(((ord(item) - 44032 - relTerminal) % 588 / 28))
let relInitial = 1 + Math.floor((ord(item) - 44032 + 1) / 588)
// Calculate base 10 number of each Unicode Jamo
let terminal = terminalChrRef + relTerminal
let mid = midChrRef + relMid
let initial = initialChrRef + relInitial
// Convert to character, then to compatibility jamo
let jamo = [chr(initial), chr(mid), chr(terminal)]
for (let j of jamo){
if (j){
compatibilityJamo = lookupJamoKo(j)
for (let cj of compatibilityJamo){
output.push(cj)
}
}
}
}
return output
}
function filterAndSortKoreanLetters(letterInput){
// Filters letterInput string to accept only characters withing Korean ranges
// and sorts them into one of 3 groups:
// jamo - Composable unicode characters for Korean letters that are used to form words
// syllable - a single unicode character that represents a syllable made of 2-4 jamo
// compatibilityJamo - unicode characters for Korean letters that are usually used for
// display a letter by itself
//
let hangulRanges = new Map([
["jamo", [parseInt('0x1100', 16), parseInt('0x11FF', 16)]],
["syllable", [parseInt('0xAC00', 16), parseInt('0xD7A3', 16)]],
["compatibilityJamo", [parseInt('0x3130', 16), parseInt('0x318F', 16)]],
])
mapOfKoreanLettersByType = new Map([["jamo", []],
["syllable", []],
["compatibilityJamo", []]
])
for (let i = 0; i<letterInput.length; i++){
let item = letterInput[i]
if (ord(item) >= hangulRanges.get("jamo")[0] && ord(item) <= hangulRanges.get("jamo")[1]){
mapOfKoreanLettersByType.get("jamo").push(item)
}
else if (ord(item) >= hangulRanges.get("syllable")[0] && ord(item) <= hangulRanges.get("syllable")[1]){
mapOfKoreanLettersByType.get("syllable").push(item)
}
else if (ord(item) >= hangulRanges.get("compatibilityJamo")[0] && ord(item) <= hangulRanges.get("compatibilityJamo")[1]){
mapOfKoreanLettersByType.get("compatibilityJamo").push(item)
}
}
return mapOfKoreanLettersByType
}
/*
* Converts all letters and multi-letter Unicode characters into a list
* of initial or medial compatibility jamo.
*
* Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists
* of Unicode Hangul Compatibility Jamo which have only one representation
* per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal
* jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If
* a character isn't a compatibility jamo and isn't in the lookup, an error
* is thrown.
*
* N.B. To be more efficient, this table could be set as a constant and
* referenced later.
*/
function lookupJamoKo(letterBlock){
let lookup = new Map([
['ᄀ', ["ㄱ"]],
['ᄁ', ["ㄲ"]],
['ᄂ', ["ㄴ"]],
['ᄃ', ["ㄷ"]],
['ᄄ', ["ㄸ"]],
['ᄅ', ["ㄹ"]],
['ᄆ', ["ㅁ"]],
['ᄇ', ["ㅂ"]],
['ᄈ', ["ㅃ"]],
['ᄉ', ["ㅅ"]],
['ᄊ', ["ㅆ"]],
['ᄋ', ["ㅇ"]],
['ᄌ', ["ㅈ"]],
['ᄍ', ["ㅉ"]],
['ᄎ', ["ㅊ"]],
['ᄏ', ["ㅋ"]],
['ᄐ', ["ㅌ"]],
['ᄑ', ["ㅍ"]],
['ᄒ', ["ㅎ"]],
['ᅡ', ["ㅏ"]],
['ᅢ', ["ㅐ"]],
['ᅣ', ["ㅑ"]],
['ᅤ', ["ㅒ"]],
['ᅥ', ["ㅓ"]],
['ᅦ', ["ㅔ"]],
['ᅧ', ["ㅕ"]],
['ᅨ', ["ㅖ"]],
['ᅩ', ["ㅗ"]],
['ᅪ', ["ㅗ", "ㅏ"]],
['ᅫ', ["ㅗ", "ㅐ"]],
['ᅬ', ["ㅗ", "ㅣ"]],
['ᅭ', ["ㅛ"]],
['ᅮ', ["ㅜ"]],
['ᅯ', ["ㅜ", "ㅓ"]],
['ᅰ', ["ㅜ", "ㅔ"]],
['ᅱ', ["ㅜ", "ㅣ"]],
['ᅲ', ["ㅠ"]],
['ᅳ', ["ㅡ"]],
['ᅴ', ["ㅡ", "ㅣ"]],
['ᅵ', ["ㅣ"]],
['ᆨ', ["ㄱ"]],
['ᆩ', ["ㄲ"]],
['ᆪ', ["ㄱ", "ㅅ"]],
['ᆫ', ["ㄴ"]],
['ᆬ', ["ㄴ", "ㅈ"]],
['ᆭ', ["ㄴ", "ㅎ"]],
['ᆮ', ["ᄃ"]],
['ᆯ', ["ㄹ"]],
['ᆰ', ["ㄹ", "ㄱ"]],
['ᆱ', ["ㄹ", "ㅁ"]],
['ᆲ', ["ㄹ", "ㅂ"]],
['ᆳ', ["ㄹ", "ㅅ"]],
['ᆴ', ["ㄹ", "ㅌ"]],
['ᆵ', ["ㄹ", "ㅍ"]],
['ᆶ', ["ㄹ", "ㅎ"]],
['ᆷ', ["ㅁ"]],
['ᆸ', ["ㅂ"]],
['ᆹ', ["ㅂ", "ㅅ"]],
['ᆺ', ["ㅅ"]],
['ᆻ', ["ㅆ"]],
['ᆼ', ["ㅇ"]],
['ᆽ', ["ㅈ"]],
['ᆾ', ["ㅊ"]],
['ᆿ', ["ㅋ"]],
['ᇀ', ["ㅌ"]],
['ᇁ', ["ㅍ"]],
['ᇂ', ["ㅎ"]],
])
let outputList = []
if (lookup.has(letterBlock)){
outputList = lookup.get(letterBlock)
}
return outputList
}
function createJamoLists(setOfCompatibilityJamo){
// Set up initial character dict (compatibility jamo -> initial jamo)
initialCharMap = new Map([
['ㄱ', 0], // 'ᄀ',
['ㄲ', 1], // 'ᄁ',
['ㄴ', 2], // 'ᄂ',
['ㄷ', 3], // 'ᄃ',
['ㄸ', 4], // 'ᄄ',
['ㄹ', 5], // 'ᄅ',
['ㅁ', 6], // 'ᄆ',
['ㅂ', 7], // 'ᄇ',
['ㅃ', 8], // 'ᄈ',
['ㅅ', 9], // 'ᄉ',
['ㅆ', 10], // 'ᄊ',
['ㅇ', 11], // 'ᄋ',
['ㅈ', 12], // 'ᄌ',
['ㅉ', 13], // 'ᄍ',
['ㅊ', 14], // 'ᄎ',
['ㅋ', 15], // 'ᄏ',
['ㅌ', 16], // 'ᄐ',
['ㅍ', 17], // 'ᄑ',
['ㅎ', 18], // 'ᄒ',
])
// Set up medial character dict (compatibility jamo -> medial jamo)
medialCharMap = new Map([
['ㅏ', 0], // 'ᅡ',
['ㅐ', 1], // 'ᅢ',
['ㅑ', 2], // 'ᅣ',
['ㅒ', 3], // 'ᅤ',
['ㅓ', 4], // 'ᅥ',
['ㅔ', 5], // 'ᅦ',
['ㅕ', 6], // 'ᅧ',
['ㅖ', 7], // 'ᅨ',
['ㅗ', 8], // 'ᅩ',
['ㅛ', 12], // 'ᅭ',
['ㅜ', 13], // 'ᅮ',
['ㅠ', 17], // 'ᅲ',
['ㅡ', 18], // 'ᅳ',
['ㅣ', 20], // 'ᅵ',
])
// Set up terminal character dict (compatibility jamo -> terminal jamo)
terminalCharMap = new Map([
['ㄱ', 1], //'ᆨ',
['ㄲ', 2], //'ᆩ',
['ㄴ', 4], //'ᆫ',
['ㄷ', 7], //'ᆮ',
['ㄹ', 8], //'ᆯ',
['ㅁ', 16], //'ᆷ',
['ㅂ', 17], //'ᆸ',
['ㅅ', 19], //'ᆺ',
['ㅆ', 20], //'ᆻ',
['ㅇ', 21], //'ᆼ',
['ㅈ', 22], //'ᆽ',
['ㅊ', 23], //'ᆾ',
['ㅋ', 24], //'ᆿ',
['ㅌ', 25], //'ᇀ',
['ㅍ', 26], //'ᇁ',
['ㅎ', 27], //'ᇂ',
])
let initialJamoList = []
let medialJamoList = []
let terminalJamoList = [0] // Syllable blocks can be only two letters long, so a 0 option is needed for none
// Look up Unicode refs for single character initial, medial, and terminal
for (let item of setOfCompatibilityJamo){
if (initialCharMap.has(item)){
initialJamoList.push(initialCharMap.get(item))
}
if (medialCharMap.has(item)){
medialJamoList.push(medialCharMap.get(item))
}
if (terminalCharMap.has(item)){
terminalJamoList.push(terminalCharMap.get(item))
}
}
// Add composite medial chars if needed
if (setOfCompatibilityJamo.has('ㅗ')){
if (setOfCompatibilityJamo.has('ㅏ')){
medialJamoList.push(9) // 'ᅪ'
}
if (setOfCompatibilityJamo.has('ㅐ')){
medialJamoList.push(10) // 'ᅫ'
}
if (setOfCompatibilityJamo.has('ㅣ')){
medialJamoList.push(11) // 'ᅬ'
}
}
if (setOfCompatibilityJamo.has('ㅜ')){
if (setOfCompatibilityJamo.has('ㅓ')){
medialJamoList.push(14) // 'ᅯ'
}
if (setOfCompatibilityJamo.has('ㅔ')){
medialJamoList.push(15) // 'ᅰ'
}
if (setOfCompatibilityJamo.has('ㅣ')){
medialJamoList.push(16) // 'ᅱ'
}
}
if (setOfCompatibilityJamo.has('ㅡ')){
if (setOfCompatibilityJamo.has('ㅣ')){
medialJamoList.push(19) // 'ᅴ'
}
}
// Add composite terminal chars if needed
if (setOfCompatibilityJamo.has('ㄱ')){
if (setOfCompatibilityJamo.has('ㅅ')){
terminalJamoList.push(3) // 'ᆪ'
}
}
if (setOfCompatibilityJamo.has('ㄴ')){
if (setOfCompatibilityJamo.has('ㅈ')){
terminalJamoList.push(5) // 'ᆬ'
}
if (setOfCompatibilityJamo.has('ㅎ')){
terminalJamoList.push(6) // 'ᆭ'
}
}
if (setOfCompatibilityJamo.has('ㄹ')){
if (setOfCompatibilityJamo.has('ㄱ')){
terminalJamoList.push(9) // 'ᆰ'
}
if (setOfCompatibilityJamo.has('ㅁ')){
terminalJamoList.push(10) // 'ᆱ'
}
if (setOfCompatibilityJamo.has('ㅂ')){
terminalJamoList.push(11) // 'ᆲ'
}
if (setOfCompatibilityJamo.has('ㅅ')){
terminalJamoList.push(12) // 'ᆳ'
}
if (setOfCompatibilityJamo.has('ㅌ')){
terminalJamoList.push(13) // 'ᆴ'
}
if (setOfCompatibilityJamo.has('ㅍ')){
terminalJamoList.push(14) // 'ᆵ'
}
if (setOfCompatibilityJamo.has('ㅎ')){
terminalJamoList.push(15) // 'ᆶ'
}
}
if (setOfCompatibilityJamo.has('ㅂ')){
if (setOfCompatibilityJamo.has('ㅅ' )){
terminalJamoList.push(18) // 'ᆹ'
}
}
return {'initialJamoList':initialJamoList, 'medialJamoList': medialJamoList, 'terminalJamoList':terminalJamoList}
}
function chr(num) {
return String.fromCharCode(num)
}
function ord(str) {
return str.charCodeAt(0)
}
function randIntInclusiveBetween(min, max){
return Math.floor(Math.random() * (max - min + 1) + min);
}
function createOneWordEn(listOfLetters, wordLength){
let listOfLettersForNewWord = []
for (let i=0; i<wordLength; i++){
index = randIntInclusiveBetween(0, listOfLetters.length-1)
randomLetter = listOfLetters[index]
listOfLettersForNewWord.push(randomLetter)
}
return listOfLettersForNewWord.join('')
}
function createManyWordsEn(SetOfLetters, NumberOfWords, MaxWordLength, lengthType='random'){
let output = []
for (let i=0; i<NumberOfWords; i++){
if(lengthType == 'random'){
wordLength = randIntInclusiveBetween(1, MaxWordLength)
} else {
wordLength = MaxWordLength
}
output.push(createOneWordEn(Array.from(SetOfLetters), wordLength))
}
return output
}
function createOneWordKo(initialList, medialList, terminalList, numberOfSyllables){
let listOfSyllableBlocks = []
for (i=0;i<numberOfSyllables;i++){
// Randomly choose from initial set
initialNum = randIntInclusiveBetween(0, initialList.length-1)
initialLetter = initialList[initialNum]
// Randomly choose from medial set
medialLetter = medialList[randIntInclusiveBetween(0, medialList.length-1)]
// Randomly choose from terminal set
terminalLetter = terminalList[randIntInclusiveBetween(0, terminalList.length-1)]
// Calculate Unicode for syllable block
let syllableBlock = (initialLetter * 588) + (medialLetter * 28) + terminalLetter + 44032
listOfSyllableBlocks.push(chr(syllableBlock))
}
return listOfSyllableBlocks.join('')
}
function createManyWordsKO(setOfCompatibilityJamo, NumberOfWords, MaxWordLength, lengthType){
// Lookup lists of Unicode refs for initial, medial, and terminal
let {initialJamoList, medialJamoList, terminalJamoList} = createJamoLists(setOfCompatibilityJamo)
output = []
for (let i=0; i<NumberOfWords; i++){
if(lengthType == 'random'){
wordLength = randIntInclusiveBetween(1, MaxWordLength)
} else {
wordLength = MaxWordLength
}
output.push(createOneWordKo(initialJamoList, medialJamoList, terminalJamoList, wordLength))
}
return output
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment