Skip to content

Instantly share code, notes, and snippets.

@colinjroberts
Created March 4, 2022 05:07
Show Gist options
  • Save colinjroberts/6ff78fdced21f389cf38b3dc58a8feb0 to your computer and use it in GitHub Desktop.
Save colinjroberts/6ff78fdced21f389cf38b3dc58a8feb0 to your computer and use it in GitHub Desktop.
From korean-typing-practice-part1: A Flask app that generates random English and Korean "words"
from string import ascii_letters, ascii_lowercase
from flask import Flask, request
from random import randint
app = Flask(__name__)
@app.route('/')
def hello():
return "Use the route /text with the following arguments: letters to use \
in the words, number of words to generate. \
e.g. localhost:5000/text?letters=abcde&count=5"
@app.route('/text', methods=['GET'])
def get_text():
MAX_WORD_LENGTH = 6
ACCEPTED_LANGUAGES = ('EN', 'KO')
# Get arguments and return helpful errors if missing
args = request.args.to_dict()
count_of_words_to_return = int(args.get('count'))
letters_provided = args.get('letters')
language = args.get('lang')
if not (count_of_words_to_return and letters_provided and language):
raise ValueError(f"int count expected, {count_of_words_to_return} was "
"provided; string letters expected, "
"{letters_provided} was provided, "
"string lang was expected, {language} was provided.")
elif int(count_of_words_to_return) <= 0:
raise ValueError(f"int count must be greater than 0: "
f"{count_of_words_to_return} was provided")
elif language not in ACCEPTED_LANGUAGES:
raise ValueError(f"Langauge must be one of {ACCEPTED_LANGUAGES}: "
f"{language} was provided")
if language == 'EN':
return handle_EN(count_of_words_to_return, letters_provided, MAX_WORD_LENGTH)
elif language == 'KO':
return handle_KO(count_of_words_to_return, letters_provided, MAX_WORD_LENGTH)
else:
return None
def handle_EN(count_of_words_to_return, letters_provided, max_word_length):
# Deduplicate list of letters
set_of_letters = deduplicate_letters_EN(letters_provided)
# Throw an error if there are no ascii letters in the set
if len(set_of_letters) <= 0:
raise ValueError(f"string letters expected at least 1 ascii letter, \
{set_of_letters} was provided")
# Create and return the number of requested words
output = create_many_words_EN(set_of_letters, count_of_words_to_return, max_word_length)
return ", ".join(output)
def deduplicate_letters_EN(list_of_letters):
set_of_letters = set()
for char in list_of_letters:
if char in ascii_letters:
set_of_letters.add(char.lower())
return set_of_letters
def create_one_word_EN(list_of_letters, word_length):
list_of_letters_for_new_word = []
for i in range(word_length):
random_letter = list_of_letters[randint(0, len(list_of_letters)-1)]
list_of_letters_for_new_word.append(random_letter)
return "".join(list_of_letters_for_new_word)
def create_many_words_EN(set_of_letters, number_of_words, max_word_length):
output = []
for i in range(number_of_words):
random_word_length = randint(1, max_word_length)
output.append(create_one_word_EN(list(set_of_letters), random_word_length))
return output
def handle_KO(count_of_words_to_return, letters_provided, max_word_length):
# Filter input to only Korean letters
dict_of_filtered_input = filter_input_KO(letters_provided)
# Throw an error if there are no Korean letters in the set
list_of_dict_contents_is_not_empty = [len(l) > 0 for l in dict_of_filtered_input.values()]
if not any(list_of_dict_contents_is_not_empty):
raise ValueError(f"string letters expected at least 1 korean letter, "
f"{letters_provided} was provided.")
# Create a deduplicated set of letters
set_of_letters = deduplicate_letters_KO(dict_of_filtered_input)
# Create and return the number of requested words
output = create_many_words_KO(set_of_letters, count_of_words_to_return, max_word_length)
return ", ".join(output)
def deduplicate_letters_KO(dict_of_filtered_input):
# Decompose letter_input into characters
decomposed_letters = []
decomposed_letters.extend(dict_of_filtered_input["compatibility_jamo"])
for j in dict_of_filtered_input["jamo"]:
decomposed_letters.extend(lookup_jamo_KO(j))
for s in dict_of_filtered_input["syllable"]:
decomposed_letters.extend(decompose_words_KO(s))
# Convert decomposed list of letters to a set
set_of_letters = set(decomposed_letters)
return set_of_letters
def filter_input_KO(letter_input):
"""Converts all letters and multi-letter Unicode characters into a list
of initial or medial compatibility jamo.
Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists
of Unicode Hangul Compatibility Jamo which have only one representation
per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal
jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If
a character isn't a compatibility jamo and isn't in the lookup, an error
is thrown.
N.B. To be more efficient, this table could be set as a constant and
referenced later.
"""
hangul_ranges = {
"jamo": (int('0x1100', 16), int('0x11FF', 16)),
"syllable": (int('0xAC00', 16), int('0xD7A3', 16)),
"compatibility_jamo": (int('0x3130', 16), int('0x318F', 16)),
}
output = {"jamo": [], "syllable": [], "compatibility_jamo": []}
for item in letter_input:
for (key, (range_min, range_max)) in hangul_ranges.items():
if range_min <= ord(item) <= range_max:
output[key].append(item)
return output
def decompose_words_KO(list_of_filtered_input):
"""Takes a list of Korean letters and words, and returns a list of
all letters in order
"""
output = []
for item in list_of_filtered_input:
# Save hex offsets for initial, medial, and terminal characters
intial_chr_ref = 4351 # Initial hangul characters start after '0x10FF'
mid_chr_ref = 4448 # Initial hangul characters start after '0x1161'
terminal_chr_ref = 4519 # Initial hangul characters start after '0x11A8'
# Calculate relative position of each jamo
terminal = (ord(item) - 44032) % 28
mid = 1 + ((ord(item) - 44032 - terminal) % 588 // 28)
initial = 1 + ((ord(item) - 44032 + 1) // 588)
# Calculate base 10 number of each Unicode Jamo
terminal = terminal_chr_ref + terminal
mid = mid_chr_ref + mid
initial = intial_chr_ref + initial
# Convert to character, then to compatibility jamo
jamo = [chr(initial), chr(mid), chr(terminal)]
for j in jamo:
if j:
output.extend(lookup_jamo_KO(j))
return output
def lookup_jamo_KO(letter_block):
"""Converts all letters and multi-letter Unicode characters into a list
of initial or medial compatibility jamo.
Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists
of Unicode Hangul Compatibility Jamo which have only one representation
per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal
jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If
a character isn't a compatibility jamo and isn't in the lookup, an error
is thrown.
N.B. To be more efficient, this table could be set as a constant and
referenced later.
"""
lookup = {
'ᄀ': ["ㄱ"],
'ᄁ': ["ㄲ"],
'ᄂ': ["ㄴ"],
'ᄃ': ["ㄷ"],
'ᄄ': ["ㄸ"],
'ᄅ': ["ㄹ"],
'ᄆ': ["ㅁ"],
'ᄇ': ["ㅂ"],
'ᄈ': ["ㅃ"],
'ᄉ': ["ㅅ"],
'ᄊ': ["ㅆ"],
'ᄋ': ["ㅇ"],
'ᄌ': ["ㅈ"],
'ᄍ': ["ㅉ"],
'ᄎ': ["ㅊ"],
'ᄏ': ["ㅋ"],
'ᄐ': ["ㅌ"],
'ᄑ': ["ㅍ"],
'ᄒ': ["ㅎ"],
'ᅡ': ["ㅏ"],
'ᅢ': ["ㅐ"],
'ᅣ': ["ㅑ"],
'ᅤ': ["ㅒ"],
'ᅥ': ["ㅓ"],
'ᅦ': ["ㅔ"],
'ᅧ': ["ㅕ"],
'ᅨ': ["ㅖ"],
'ᅩ': ["ㅗ"],
'ᅪ': ["ㅗ", "ㅏ"],
'ᅫ': ["ㅗ", "ㅐ"],
'ᅬ': ["ㅗ", "ㅣ"],
'ᅭ': ["ㅛ"],
'ᅮ': ["ㅜ"],
'ᅯ': ["ㅜ", "ㅓ"],
'ᅰ': ["ㅜ", "ㅔ"],
'ᅱ': ["ㅜ", "ㅣ"],
'ᅲ': ["ㅠ"],
'ᅳ': ["ㅡ"],
'ᅴ': ["ㅣ"],
'ᅵ': ["ㅣ"],
'ᆨ': ["ㄱ"],
'ᆩ': ["ㄲ"],
'ᆪ': ["ㄱ", "ㅅ"],
'ᆫ': ["ㄴ"],
'ᆬ': ["ㄴ", "ㅈ"],
'ᆭ': ["ㄴ", "ㅎ"],
'ᆮ': ["ᄃ"],
'ᆯ': ["ㄹ"],
'ᆰ': ["ㄹ", "ㄱ"],
'ᆱ': ["ㄹ", "ㅁ"],
'ᆲ': ["ㄹ", "ㅂ"],
'ᆳ': ["ㄹ", "ㅅ"],
'ᆴ': ["ㄹ", "ㅌ"],
'ᆵ': ["ㄹ", "ㅍ"],
'ᆶ': ["ㄹ", "ㅎ"],
'ᆷ': ["ㅁ"],
'ᆸ': ["ㅂ"],
'ᆹ': ["ㅂ", "ㅅ"],
'ᆺ': ["ㅅ"],
'ᆻ': ["ㅆ"],
'ᆼ': ["ㅇ"],
'ᆽ': ["ㅈ"],
'ᆾ': ["ㅊ"],
'ᆿ': ["ㅋ"],
'ᇀ': ["ㅌ"],
'ᇁ': ["ㅍ"],
'ᇂ': ["ㅎ"],
}
output_list = lookup.get(letter_block, [])
return output_list
def create_jamo_lists(set_of_compatibility_jamo):
# Set up initial character dict (compatibility jamo -> initial jamo)
initial_char_dict = {
'ㄱ': 0, # 'ᄀ',
'ㄲ': 1, # 'ᄁ',
'ㄴ': 2, # 'ᄂ',
'ㄷ': 3, # 'ᄃ',
'ㄸ': 4, # 'ᄄ',
'ㄹ': 5, # 'ᄅ',
'ㅁ': 6, # 'ᄆ',
'ㅂ': 7, # 'ᄇ',
'ㅃ': 8, # 'ᄈ',
'ㅅ': 9, # 'ᄉ',
'ㅆ': 10, # 'ᄊ',
'ㅇ': 11, # 'ᄋ',
'ㅈ': 12, # 'ᄌ',
'ㅉ': 13, # 'ᄍ',
'ㅊ': 14, # 'ᄎ',
'ㅋ': 15, # 'ᄏ',
'ㅌ': 16, # 'ᄐ',
'ㅍ': 17, # 'ᄑ',
'ㅎ': 18, # 'ᄒ',
}
# Set up medial character dict (compatibility jamo -> medial jamo)
medial_char_dict = {
'ㅏ': 0, # 'ᅡ',
'ㅐ': 1, # 'ᅢ',
'ㅑ': 2, # 'ᅣ',
'ㅒ': 3, # 'ᅤ',
'ㅓ': 4, # 'ᅥ',
'ㅔ': 5, # 'ᅦ',
'ㅕ': 6, # 'ᅧ',
'ㅖ': 7, # 'ᅨ',
'ㅗ': 8, # 'ᅩ',
'ㅛ': 12, # 'ᅭ',
'ㅜ': 13, # 'ᅮ',
'ㅠ': 17, # 'ᅲ',
'ㅡ': 18, # 'ᅳ',
'ㅣ': 20, # 'ᅵ',
}
# Set up terminal character dict (compatibility jamo -> terminal jamo)
terminal_char_dict = {
'ㄱ': 1, #'ᆨ',
'ㄲ': 2, #'ᆩ',
'ㄴ': 4, #'ᆫ',
'ㄷ': 7, #'ᆮ',
'ㄹ': 8, #'ᆯ',
'ㅁ': 16, #'ᆷ',
'ㅂ': 17, #'ᆸ',
'ㅅ': 19, #'ᆺ',
'ㅆ': 20, #'ᆻ',
'ㅇ': 21, #'ᆼ',
'ㅈ': 22, #'ᆽ',
'ㅊ': 23, #'ᆾ',
'ㅋ': 24, #'ᆿ',
'ㅌ': 25, #'ᇀ',
'ㅍ': 26, #'ᇁ',
'ㅎ': 27, #'ᇂ',
}
initial_jamo_list = []
medial_jamo_list = []
terminal_jamo_list = [0] # Syllable blocks can be only two letters long, so a 0 option is needed for none
# Look up Unicode refs for single character initial, medial, and terminal
for item in set_of_compatibility_jamo:
if item in initial_char_dict:
initial_jamo_list.append(initial_char_dict[item])
if item in medial_char_dict:
medial_jamo_list.append(medial_char_dict[item])
if item in terminal_char_dict:
terminal_jamo_list.append(terminal_char_dict[item])
# Add composite medial chars if needed
if 'ㅗ' in set_of_compatibility_jamo:
if 'ㅏ' in set_of_compatibility_jamo:
medial_jamo_list.append(9) # 'ᅪ'
if 'ㅐ' in set_of_compatibility_jamo:
medial_jamo_list.append(10) # 'ᅫ'
if 'ㅣ' in set_of_compatibility_jamo:
medial_jamo_list.append(11) # 'ᅬ'
if 'ㅜ' in set_of_compatibility_jamo:
if 'ㅓ' in set_of_compatibility_jamo:
medial_jamo_list.append(14) # 'ᅯ'
if 'ㅔ' in set_of_compatibility_jamo:
medial_jamo_list.append(15) # 'ᅰ'
if 'ㅣ' in set_of_compatibility_jamo:
medial_jamo_list.append(16) # 'ᅱ'
if 'ㅡ' in set_of_compatibility_jamo:
if 'ㅣ' in set_of_compatibility_jamo:
medial_jamo_list.append(19) # 'ᅴ'
# Add composite terminal chars if needed
if 'ㄱ' in set_of_compatibility_jamo:
if 'ㅅ' in set_of_compatibility_jamo:
terminal_jamo_list.append(3) # 'ᆪ'
if 'ㄴ' in set_of_compatibility_jamo:
if 'ㅈ' in set_of_compatibility_jamo:
terminal_jamo_list.append(5) # 'ᆬ'
if 'ㅎ' in set_of_compatibility_jamo:
terminal_jamo_list.append(6) # 'ᆭ'
if 'ㄹ' in set_of_compatibility_jamo:
if 'ㄱ' in set_of_compatibility_jamo:
terminal_jamo_list.append(9) # 'ᆰ'
if 'ㅁ' in set_of_compatibility_jamo:
terminal_jamo_list.append(10) # 'ᆱ'
if 'ㅂ' in set_of_compatibility_jamo:
terminal_jamo_list.append(11) # 'ᆲ'
if 'ㅅ' in set_of_compatibility_jamo:
terminal_jamo_list.append(12) # 'ᆳ'
if 'ㅌ' in set_of_compatibility_jamo:
terminal_jamo_list.append(13) # 'ᆴ'
if 'ㅍ' in set_of_compatibility_jamo:
terminal_jamo_list.append(14) # 'ᆵ'
if 'ㅎ' in set_of_compatibility_jamo:
terminal_jamo_list.append(15) # 'ᆶ'
if 'ㅂ' in set_of_compatibility_jamo:
if 'ㅅ' in set_of_compatibility_jamo:
terminal_jamo_list.append(18) # 'ᆹ'
return initial_jamo_list, medial_jamo_list, terminal_jamo_list
def create_one_word_KO(initial_list, medial_list, terminal_list, number_of_syllables):
list_of_syllable_blocks = []
for i in range(number_of_syllables):
# Randomly choose from initial set
initial_letter = initial_list[randint(0, len(initial_list)-1)]
# Randomly choose from medial set
medial_letter = medial_list[randint(0, len(medial_list)-1)]
# Randomly choose from terminal set
terminal_letter = terminal_list[randint(0, len(terminal_list)-1)]
# Calculate Unicode for syllable block
syllable_block = (initial_letter * 588) + (medial_letter * 28) + terminal_letter + 44032
list_of_syllable_blocks.append(chr(syllable_block))
return "".join(list_of_syllable_blocks)
def create_many_words_KO(set_of_compatibility_jamo, number_of_words, max_word_length):
# Lookup lists of Unicode refs for initial, medial, and terminal
initial_jamo_list, medial_jamo_list, terminal_jamo_list = create_jamo_lists(set_of_compatibility_jamo)
# Throw error if no initial or medial jamo exist
if not (initial_jamo_list and medial_jamo_list):
raise ValueError(f"There must be at least one initial letter and at least one vowel.")
# Create a list of words
output = []
for i in range(number_of_words):
output.append(create_one_word_KO(initial_jamo_list, medial_jamo_list, terminal_jamo_list, randint(1,max_word_length)))
return output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment