-
-
Save colinjroberts/f3a4ca2b3b5525e7f2be3b88a44ab81c to your computer and use it in GitHub Desktop.
korean-typing-practice-part3 - wordDecomposer.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getText(countOfWordsToReturn, lettersProvided, language){ | |
MaxWordLength = 6 | |
ACCEPTED_LANGUAGES = new Set(['EN', 'KO']) | |
// Raise errors if inputs are wrong | |
if (! countOfWordsToReturn && lettersProvided && language){ | |
throw `int count expected, ${countOfWordsToReturn} was ` | |
`provided; string letters expected, ` | |
`${lettersProvided} was provided, ` | |
`string lang was expected, ${language} was provided.` | |
} else if (parseInt(countOfWordsToReturn) <= 0){ | |
throw `int count must be greater than 0: ${countOfWordsToReturn} was provided` | |
} | |
else if (!ACCEPTED_LANGUAGES.has(language)){ | |
throw `Language must be one of ${ACCEPTED_LANGUAGES}: ${language} was provided` | |
} | |
// Return only a space if only whitespace is provided | |
if (! lettersProvided.trim()){ | |
return " " | |
} | |
let output = [] | |
// Return words made by corresponding langauge handler | |
if (language == 'EN'){ | |
output = handleEn(countOfWordsToReturn, lettersProvided, MaxWordLength) | |
} else if (language == 'KO'){ | |
output = handleKo(countOfWordsToReturn, lettersProvided, MaxWordLength) | |
} else { | |
return None | |
} | |
return output.join(" ") | |
} | |
/* Filter and deduplicate input, generate words using that | |
*/ | |
function handleEn(countOfWordsToReturn, lettersProvided, MaxWordLength){ | |
// Deduplicate list of letters | |
SetOfLetters = decomposeWordsEn(lettersProvided) | |
// Throw an error if there are no ascii letters in the set | |
if (SetOfLetters.length <= 0){ | |
throw ValueError(`string letters expected at least 1 ascii letter, ` | |
`${SetOfLetters} was provided`) | |
} | |
// Create and return the number of requested words | |
listOfGeneratedWords = createManyWordsEn(SetOfLetters, countOfWordsToReturn, MaxWordLength) | |
return listOfGeneratedWords | |
} | |
/* Filter and decompose input, generate words using that | |
*/ | |
function handleKo(CountOfWordsToReturn, LettersProvided, MaxWordLength){ | |
// Filter input to only Korean letters | |
let mapOfKoreanLettersByType = filterAndSortKoreanLetters(LettersProvided) | |
// Throw an error if there are no Korean letters in any of the lists | |
let allListsAreEmpty = true | |
mapOfKoreanLettersByType.forEach( (value, key) => { | |
allListsAreEmpty = allListsAreEmpty && value.length == 0 | |
}) | |
if(allListsAreEmpty){ | |
throw ValueError(`string letters expected at least 1 korean letter, ` | |
`${LettersProvided} was provided.`) | |
} | |
// Create a deduplicated set of letters | |
setOfLetters = decomposeWordsKo(mapOfKoreanLettersByType) | |
// Create and return the number of requested words | |
listOfGeneratedWords = createManyWordsKO(setOfLetters, CountOfWordsToReturn, MaxWordLength) | |
return listOfGeneratedWords | |
} | |
/* | |
* Returns a decomposed array of letters in the order they appear in words. | |
* In this code, to decompose means both dividing Korean syllable blocks | |
* (one 'character'/unicode codepoint that is made up of 2-4 characters), | |
* into separate codepoints for each letter AND to deduplicate letters | |
* English and Korean letters. | |
* | |
* @param {string} words to be decomposed/deduplicated | |
* @param {string} the human language of the words (EN for English or KO for Korean) | |
* @return {Set} the dcomposed/deduplicated letters of the input | |
*/ | |
// function decomposeWords(words, language){ | |
// if (words.length <=0 || (language != 'EN' && language != 'KO')){ | |
// return new TypeError('words must be of non-zero length and language must be one of EN or KO'); | |
// } | |
// if (language == 'EN'){ | |
// return decomposeWordsEn(words); | |
// } else if (language == 'KO'){ | |
// return decomposeWordsKo(words); | |
// } else { | |
// throw(Error, "language must be one of EN or KO") | |
// } | |
// } | |
function characterIsInAsciiLetterRange(letter){ | |
let num = ord(letter) | |
return (num >= 65 && num <= 90) || (num >= 97 && num <= 122) | |
} | |
/* | |
* Takes an array of letters and returns a list of | |
* all letters in order | |
*/ | |
function decomposeWordsEn(stringOfLetters){ | |
let setOfLetters = new Set() | |
for (let char of stringOfLetters){ | |
if (characterIsInAsciiLetterRange(char)){ | |
setOfLetters.add(char.toLowerCase()) | |
}} | |
return setOfLetters | |
} | |
function decomposeWordsKo(mapOfKoreanLettersByType){ | |
// Compatibility Jamo can be directly added to the output | |
let listOfCompatibilityJamo = [] | |
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(mapOfKoreanLettersByType.get("compatibilityJamo")) | |
// Initial and medial Jamo can be looked up and added | |
for (let jamo of mapOfKoreanLettersByType.get("jamo")){ | |
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(lookupJamoKo(jamo)) | |
} | |
// Syllable characters need to be decomposed | |
for (let syllable of mapOfKoreanLettersByType.get("syllable")){ | |
listOfCompatibilityJamo = listOfCompatibilityJamo.concat(decomposeKoSyllables(syllable)) | |
} | |
// Convert to a set to match EN | |
let setOfCompatibilityJamo = new Set(listOfCompatibilityJamo) | |
return setOfCompatibilityJamo | |
} | |
/* | |
* Takes a list of Korean syllable blocks and returns a list of | |
* compatibility jamo in the order they appear | |
*/ | |
function decomposeKoSyllables(listOfFilteredInput){ | |
let output = [] | |
for (let item of listOfFilteredInput){ | |
// Save hex offsets for initial, medial, and terminal characters | |
let initialChrRef = 4351 // Initial hangul characters start after '0x10FF' | |
let midChrRef = 4448 // Initial hangul characters start after '0x1161' | |
let terminalChrRef = 4519 // Initial hangul characters start after '0x11A8' | |
// Calculate relative position of each jamo | |
let relTerminal = (ord(item) - 44032) % 28 | |
let relMid = 1 + Math.floor(((ord(item) - 44032 - relTerminal) % 588 / 28)) | |
let relInitial = 1 + Math.floor((ord(item) - 44032 + 1) / 588) | |
// Calculate base 10 number of each Unicode Jamo | |
let terminal = terminalChrRef + relTerminal | |
let mid = midChrRef + relMid | |
let initial = initialChrRef + relInitial | |
// Convert to character, then to compatibility jamo | |
let jamo = [chr(initial), chr(mid), chr(terminal)] | |
for (let j of jamo){ | |
if (j){ | |
compatibilityJamo = lookupJamoKo(j) | |
for (let cj of compatibilityJamo){ | |
output.push(cj) | |
} | |
} | |
} | |
} | |
return output | |
} | |
function filterAndSortKoreanLetters(letterInput){ | |
// Filters letterInput string to accept only characters withing Korean ranges | |
// and sorts them into one of 3 groups: | |
// jamo - Composable unicode characters for Korean letters that are used to form words | |
// syllable - a single unicode character that represents a syllable made of 2-4 jamo | |
// compatibilityJamo - unicode characters for Korean letters that are usually used for | |
// display a letter by itself | |
// | |
let hangulRanges = new Map([ | |
["jamo", [parseInt('0x1100', 16), parseInt('0x11FF', 16)]], | |
["syllable", [parseInt('0xAC00', 16), parseInt('0xD7A3', 16)]], | |
["compatibilityJamo", [parseInt('0x3130', 16), parseInt('0x318F', 16)]], | |
]) | |
mapOfKoreanLettersByType = new Map([["jamo", []], | |
["syllable", []], | |
["compatibilityJamo", []] | |
]) | |
for (let i = 0; i<letterInput.length; i++){ | |
let item = letterInput[i] | |
if (ord(item) >= hangulRanges.get("jamo")[0] && ord(item) <= hangulRanges.get("jamo")[1]){ | |
mapOfKoreanLettersByType.get("jamo").push(item) | |
} | |
else if (ord(item) >= hangulRanges.get("syllable")[0] && ord(item) <= hangulRanges.get("syllable")[1]){ | |
mapOfKoreanLettersByType.get("syllable").push(item) | |
} | |
else if (ord(item) >= hangulRanges.get("compatibilityJamo")[0] && ord(item) <= hangulRanges.get("compatibilityJamo")[1]){ | |
mapOfKoreanLettersByType.get("compatibilityJamo").push(item) | |
} | |
} | |
return mapOfKoreanLettersByType | |
} | |
/* | |
* Converts all letters and multi-letter Unicode characters into a list | |
* of initial or medial compatibility jamo. | |
* | |
* Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists | |
* of Unicode Hangul Compatibility Jamo which have only one representation | |
* per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal | |
* jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If | |
* a character isn't a compatibility jamo and isn't in the lookup, an error | |
* is thrown. | |
* | |
* N.B. To be more efficient, this table could be set as a constant and | |
* referenced later. | |
*/ | |
function lookupJamoKo(letterBlock){ | |
let lookup = new Map([ | |
['ᄀ', ["ㄱ"]], | |
['ᄁ', ["ㄲ"]], | |
['ᄂ', ["ㄴ"]], | |
['ᄃ', ["ㄷ"]], | |
['ᄄ', ["ㄸ"]], | |
['ᄅ', ["ㄹ"]], | |
['ᄆ', ["ㅁ"]], | |
['ᄇ', ["ㅂ"]], | |
['ᄈ', ["ㅃ"]], | |
['ᄉ', ["ㅅ"]], | |
['ᄊ', ["ㅆ"]], | |
['ᄋ', ["ㅇ"]], | |
['ᄌ', ["ㅈ"]], | |
['ᄍ', ["ㅉ"]], | |
['ᄎ', ["ㅊ"]], | |
['ᄏ', ["ㅋ"]], | |
['ᄐ', ["ㅌ"]], | |
['ᄑ', ["ㅍ"]], | |
['ᄒ', ["ㅎ"]], | |
['ᅡ', ["ㅏ"]], | |
['ᅢ', ["ㅐ"]], | |
['ᅣ', ["ㅑ"]], | |
['ᅤ', ["ㅒ"]], | |
['ᅥ', ["ㅓ"]], | |
['ᅦ', ["ㅔ"]], | |
['ᅧ', ["ㅕ"]], | |
['ᅨ', ["ㅖ"]], | |
['ᅩ', ["ㅗ"]], | |
['ᅪ', ["ㅗ", "ㅏ"]], | |
['ᅫ', ["ㅗ", "ㅐ"]], | |
['ᅬ', ["ㅗ", "ㅣ"]], | |
['ᅭ', ["ㅛ"]], | |
['ᅮ', ["ㅜ"]], | |
['ᅯ', ["ㅜ", "ㅓ"]], | |
['ᅰ', ["ㅜ", "ㅔ"]], | |
['ᅱ', ["ㅜ", "ㅣ"]], | |
['ᅲ', ["ㅠ"]], | |
['ᅳ', ["ㅡ"]], | |
['ᅴ', ["ㅡ", "ㅣ"]], | |
['ᅵ', ["ㅣ"]], | |
['ᆨ', ["ㄱ"]], | |
['ᆩ', ["ㄲ"]], | |
['ᆪ', ["ㄱ", "ㅅ"]], | |
['ᆫ', ["ㄴ"]], | |
['ᆬ', ["ㄴ", "ㅈ"]], | |
['ᆭ', ["ㄴ", "ㅎ"]], | |
['ᆮ', ["ᄃ"]], | |
['ᆯ', ["ㄹ"]], | |
['ᆰ', ["ㄹ", "ㄱ"]], | |
['ᆱ', ["ㄹ", "ㅁ"]], | |
['ᆲ', ["ㄹ", "ㅂ"]], | |
['ᆳ', ["ㄹ", "ㅅ"]], | |
['ᆴ', ["ㄹ", "ㅌ"]], | |
['ᆵ', ["ㄹ", "ㅍ"]], | |
['ᆶ', ["ㄹ", "ㅎ"]], | |
['ᆷ', ["ㅁ"]], | |
['ᆸ', ["ㅂ"]], | |
['ᆹ', ["ㅂ", "ㅅ"]], | |
['ᆺ', ["ㅅ"]], | |
['ᆻ', ["ㅆ"]], | |
['ᆼ', ["ㅇ"]], | |
['ᆽ', ["ㅈ"]], | |
['ᆾ', ["ㅊ"]], | |
['ᆿ', ["ㅋ"]], | |
['ᇀ', ["ㅌ"]], | |
['ᇁ', ["ㅍ"]], | |
['ᇂ', ["ㅎ"]], | |
]) | |
let outputList = [] | |
if (lookup.has(letterBlock)){ | |
outputList = lookup.get(letterBlock) | |
} | |
return outputList | |
} | |
function createJamoLists(setOfCompatibilityJamo){ | |
// Set up initial character dict (compatibility jamo -> initial jamo) | |
initialCharMap = new Map([ | |
['ㄱ', 0], // 'ᄀ', | |
['ㄲ', 1], // 'ᄁ', | |
['ㄴ', 2], // 'ᄂ', | |
['ㄷ', 3], // 'ᄃ', | |
['ㄸ', 4], // 'ᄄ', | |
['ㄹ', 5], // 'ᄅ', | |
['ㅁ', 6], // 'ᄆ', | |
['ㅂ', 7], // 'ᄇ', | |
['ㅃ', 8], // 'ᄈ', | |
['ㅅ', 9], // 'ᄉ', | |
['ㅆ', 10], // 'ᄊ', | |
['ㅇ', 11], // 'ᄋ', | |
['ㅈ', 12], // 'ᄌ', | |
['ㅉ', 13], // 'ᄍ', | |
['ㅊ', 14], // 'ᄎ', | |
['ㅋ', 15], // 'ᄏ', | |
['ㅌ', 16], // 'ᄐ', | |
['ㅍ', 17], // 'ᄑ', | |
['ㅎ', 18], // 'ᄒ', | |
]) | |
// Set up medial character dict (compatibility jamo -> medial jamo) | |
medialCharMap = new Map([ | |
['ㅏ', 0], // 'ᅡ', | |
['ㅐ', 1], // 'ᅢ', | |
['ㅑ', 2], // 'ᅣ', | |
['ㅒ', 3], // 'ᅤ', | |
['ㅓ', 4], // 'ᅥ', | |
['ㅔ', 5], // 'ᅦ', | |
['ㅕ', 6], // 'ᅧ', | |
['ㅖ', 7], // 'ᅨ', | |
['ㅗ', 8], // 'ᅩ', | |
['ㅛ', 12], // 'ᅭ', | |
['ㅜ', 13], // 'ᅮ', | |
['ㅠ', 17], // 'ᅲ', | |
['ㅡ', 18], // 'ᅳ', | |
['ㅣ', 20], // 'ᅵ', | |
]) | |
// Set up terminal character dict (compatibility jamo -> terminal jamo) | |
terminalCharMap = new Map([ | |
['ㄱ', 1], //'ᆨ', | |
['ㄲ', 2], //'ᆩ', | |
['ㄴ', 4], //'ᆫ', | |
['ㄷ', 7], //'ᆮ', | |
['ㄹ', 8], //'ᆯ', | |
['ㅁ', 16], //'ᆷ', | |
['ㅂ', 17], //'ᆸ', | |
['ㅅ', 19], //'ᆺ', | |
['ㅆ', 20], //'ᆻ', | |
['ㅇ', 21], //'ᆼ', | |
['ㅈ', 22], //'ᆽ', | |
['ㅊ', 23], //'ᆾ', | |
['ㅋ', 24], //'ᆿ', | |
['ㅌ', 25], //'ᇀ', | |
['ㅍ', 26], //'ᇁ', | |
['ㅎ', 27], //'ᇂ', | |
]) | |
let initialJamoList = [] | |
let medialJamoList = [] | |
let terminalJamoList = [0] // Syllable blocks can be only two letters long, so a 0 option is needed for none | |
// Look up Unicode refs for single character initial, medial, and terminal | |
for (let item of setOfCompatibilityJamo){ | |
if (initialCharMap.has(item)){ | |
initialJamoList.push(initialCharMap.get(item)) | |
} | |
if (medialCharMap.has(item)){ | |
medialJamoList.push(medialCharMap.get(item)) | |
} | |
if (terminalCharMap.has(item)){ | |
terminalJamoList.push(terminalCharMap.get(item)) | |
} | |
} | |
// Add composite medial chars if needed | |
if (setOfCompatibilityJamo.has('ㅗ')){ | |
if (setOfCompatibilityJamo.has('ㅏ')){ | |
medialJamoList.push(9) // 'ᅪ' | |
} | |
if (setOfCompatibilityJamo.has('ㅐ')){ | |
medialJamoList.push(10) // 'ᅫ' | |
} | |
if (setOfCompatibilityJamo.has('ㅣ')){ | |
medialJamoList.push(11) // 'ᅬ' | |
} | |
} | |
if (setOfCompatibilityJamo.has('ㅜ')){ | |
if (setOfCompatibilityJamo.has('ㅓ')){ | |
medialJamoList.push(14) // 'ᅯ' | |
} | |
if (setOfCompatibilityJamo.has('ㅔ')){ | |
medialJamoList.push(15) // 'ᅰ' | |
} | |
if (setOfCompatibilityJamo.has('ㅣ')){ | |
medialJamoList.push(16) // 'ᅱ' | |
} | |
} | |
if (setOfCompatibilityJamo.has('ㅡ')){ | |
if (setOfCompatibilityJamo.has('ㅣ')){ | |
medialJamoList.push(19) // 'ᅴ' | |
} | |
} | |
// Add composite terminal chars if needed | |
if (setOfCompatibilityJamo.has('ㄱ')){ | |
if (setOfCompatibilityJamo.has('ㅅ')){ | |
terminalJamoList.push(3) // 'ᆪ' | |
} | |
} | |
if (setOfCompatibilityJamo.has('ㄴ')){ | |
if (setOfCompatibilityJamo.has('ㅈ')){ | |
terminalJamoList.push(5) // 'ᆬ' | |
} | |
if (setOfCompatibilityJamo.has('ㅎ')){ | |
terminalJamoList.push(6) // 'ᆭ' | |
} | |
} | |
if (setOfCompatibilityJamo.has('ㄹ')){ | |
if (setOfCompatibilityJamo.has('ㄱ')){ | |
terminalJamoList.push(9) // 'ᆰ' | |
} | |
if (setOfCompatibilityJamo.has('ㅁ')){ | |
terminalJamoList.push(10) // 'ᆱ' | |
} | |
if (setOfCompatibilityJamo.has('ㅂ')){ | |
terminalJamoList.push(11) // 'ᆲ' | |
} | |
if (setOfCompatibilityJamo.has('ㅅ')){ | |
terminalJamoList.push(12) // 'ᆳ' | |
} | |
if (setOfCompatibilityJamo.has('ㅌ')){ | |
terminalJamoList.push(13) // 'ᆴ' | |
} | |
if (setOfCompatibilityJamo.has('ㅍ')){ | |
terminalJamoList.push(14) // 'ᆵ' | |
} | |
if (setOfCompatibilityJamo.has('ㅎ')){ | |
terminalJamoList.push(15) // 'ᆶ' | |
} | |
} | |
if (setOfCompatibilityJamo.has('ㅂ')){ | |
if (setOfCompatibilityJamo.has('ㅅ' )){ | |
terminalJamoList.push(18) // 'ᆹ' | |
} | |
} | |
return {'initialJamoList':initialJamoList, 'medialJamoList': medialJamoList, 'terminalJamoList':terminalJamoList} | |
} | |
function chr(num) { | |
return String.fromCharCode(num) | |
} | |
function ord(str) { | |
return str.charCodeAt(0) | |
} | |
function randIntInclusiveBetween(min, max){ | |
return Math.floor(Math.random() * (max - min + 1) + min); | |
} | |
function createOneWordEn(listOfLetters, wordLength){ | |
let listOfLettersForNewWord = [] | |
for (let i=0; i<wordLength; i++){ | |
index = randIntInclusiveBetween(0, listOfLetters.length-1) | |
randomLetter = listOfLetters[index] | |
listOfLettersForNewWord.push(randomLetter) | |
} | |
return listOfLettersForNewWord.join('') | |
} | |
function createManyWordsEn(SetOfLetters, NumberOfWords, MaxWordLength, lengthType='random'){ | |
let output = [] | |
for (let i=0; i<NumberOfWords; i++){ | |
if(lengthType == 'random'){ | |
wordLength = randIntInclusiveBetween(1, MaxWordLength) | |
} else { | |
wordLength = MaxWordLength | |
} | |
output.push(createOneWordEn(Array.from(SetOfLetters), wordLength)) | |
} | |
return output | |
} | |
function createOneWordKo(initialList, medialList, terminalList, numberOfSyllables){ | |
let listOfSyllableBlocks = [] | |
for (i=0;i<numberOfSyllables;i++){ | |
// Randomly choose from initial set | |
initialNum = randIntInclusiveBetween(0, initialList.length-1) | |
initialLetter = initialList[initialNum] | |
// Randomly choose from medial set | |
medialLetter = medialList[randIntInclusiveBetween(0, medialList.length-1)] | |
// Randomly choose from terminal set | |
terminalLetter = terminalList[randIntInclusiveBetween(0, terminalList.length-1)] | |
// Calculate Unicode for syllable block | |
let syllableBlock = (initialLetter * 588) + (medialLetter * 28) + terminalLetter + 44032 | |
listOfSyllableBlocks.push(chr(syllableBlock)) | |
} | |
return listOfSyllableBlocks.join('') | |
} | |
function createManyWordsKO(setOfCompatibilityJamo, NumberOfWords, MaxWordLength, lengthType){ | |
// Lookup lists of Unicode refs for initial, medial, and terminal | |
let {initialJamoList, medialJamoList, terminalJamoList} = createJamoLists(setOfCompatibilityJamo) | |
output = [] | |
for (let i=0; i<NumberOfWords; i++){ | |
if(lengthType == 'random'){ | |
wordLength = randIntInclusiveBetween(1, MaxWordLength) | |
} else { | |
wordLength = MaxWordLength | |
} | |
output.push(createOneWordKo(initialJamoList, medialJamoList, terminalJamoList, wordLength)) | |
} | |
return output | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment