Skip to content

Instantly share code, notes, and snippets.

@colinjroberts
Created March 4, 2022 05:05
Show Gist options
  • Save colinjroberts/6e48bfb4ae664a3eb1527d03c5bcad87 to your computer and use it in GitHub Desktop.
Save colinjroberts/6e48bfb4ae664a3eb1527d03c5bcad87 to your computer and use it in GitHub Desktop.
From korean-typing-practice-part1: A Flask App that generates random English words and deduplicates Korean letters
from string import ascii_letters, ascii_lowercase
from flask import Flask, request
from random import randint
app = Flask(__name__)
@app.route('/')
def hello():
return "Use the route /text with the following arguments: letters to use \
in the words, number of words to generate. \
e.g. localhost:5000/text?letters=abcde&count=5"
@app.route('/text', methods=['GET'])
def get_text():
MAX_WORD_LENGTH = 6
ACCEPTED_LANGUAGES = ('EN', 'KO')
# Get arguments and return helpful errors if missing
args = request.args.to_dict()
count_of_words_to_return = int(args.get('count'))
letters_provided = args.get('letters')
language = args.get('lang')
if not (count_of_words_to_return and letters_provided and language):
raise ValueError(f"int count expected, {count_of_words_to_return} was "
"provided; string letters expected, "
"{letters_provided} was provided, "
"string lang was expected, {language} was provided.")
elif int(count_of_words_to_return) <= 0:
raise ValueError(f"int count must be greater than 0: "
f"{count_of_words_to_return} was provided")
elif language not in ACCEPTED_LANGUAGES:
raise ValueError(f"Langauge must be one of {ACCEPTED_LANGUAGES}: "
f"{language} was provided")
if language == 'EN':
return handle_EN(count_of_words_to_return, letters_provided, MAX_WORD_LENGTH)
elif language == 'KO':
return handle_KO(count_of_words_to_return, letters_provided, MAX_WORD_LENGTH)
else:
return None
def handle_EN(count_of_words_to_return, letters_provided, max_word_length):
# Deduplicate list of letters
set_of_letters = deduplicate_letters_EN(letters_provided)
# Throw an error if there are no ascii letters in the set
if len(set_of_letters) <= 0:
raise ValueError(f"string letters expected at least 1 ascii letter, \
{set_of_letters} was provided")
# Create and return the number of requested words
output = create_many_words_EN(set_of_letters, count_of_words_to_return, max_word_length)
return ", ".join(output)
def deduplicate_letters_EN(list_of_letters):
set_of_letters = set()
for char in list_of_letters:
if char in ascii_letters:
set_of_letters.add(char.lower())
return set_of_letters
def create_one_word_EN(list_of_letters, word_length):
list_of_letters_for_new_word = []
for i in range(word_length):
random_letter = list_of_letters[randint(0, len(list_of_letters)-1)]
list_of_letters_for_new_word.append(random_letter)
return "".join(list_of_letters_for_new_word)
def create_many_words_EN(set_of_letters, number_of_words, max_word_length):
output = []
for i in range(number_of_words):
random_word_length = randint(1, max_word_length)
output.append(create_one_word_EN(list(set_of_letters), random_word_length))
return output
def handle_KO(count_of_words_to_return, letters_provided, max_word_length):
# Filter input to only Korean letters
dict_of_filtered_input = filter_input_KO(letters_provided)
# Throw an error if there are no Korean letters in the set
list_of_dict_contents_is_not_empty = [len(l) > 0 for l in dict_of_filtered_input.values()]
if not any(list_of_dict_contents_is_not_empty):
raise ValueError(f"string letters expected at least 1 korean letter, "
f"{letters_provided} was provided.")
# Create a deduplicated set of letters
set_of_letters = deduplicate_letters_KO(dict_of_filtered_input)
return ", ".join(set_of_letters)
def deduplicate_letters_KO(dict_of_filtered_input):
# Decompose letter_input into characters
decomposed_letters = []
decomposed_letters.extend(dict_of_filtered_input["compatibility_jamo"])
for j in dict_of_filtered_input["jamo"]:
decomposed_letters.extend(lookup_jamo_KO(j))
for s in dict_of_filtered_input["syllable"]:
decomposed_letters.extend(decompose_words_KO(s))
# Convert decomposed list of letters to a set
set_of_letters = set(decomposed_letters)
return set_of_letters
def filter_input_KO(letter_input):
"""Converts all letters and multi-letter Unicode characters into a list
of initial or medial compatibility jamo.
Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists
of Unicode Hangul Compatibility Jamo which have only one representation
per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal
jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If
a character isn't a compatibility jamo and isn't in the lookup, an error
is thrown.
N.B. To be more efficient, this table could be set as a constant and
referenced later.
"""
hangul_ranges = {
"jamo": (int('0x1100', 16), int('0x11FF', 16)),
"syllable": (int('0xAC00', 16), int('0xD7A3', 16)),
"compatibility_jamo": (int('0x3130', 16), int('0x318F', 16)),
}
output = {"jamo": [], "syllable": [], "compatibility_jamo": []}
for item in letter_input:
for (key, (range_min, range_max)) in hangul_ranges.items():
if range_min <= ord(item) <= range_max:
output[key].append(item)
return output
def decompose_words_KO(list_of_filtered_input):
"""Takes a list of Korean letters and words, and returns a list of
all letters in order
"""
output = []
for item in list_of_filtered_input:
# Save hex offsets for initial, medial, and terminal characters
intial_chr_ref = 4351 # Initial hangul characters start after '0x10FF'
mid_chr_ref = 4448 # Initial hangul characters start after '0x1161'
terminal_chr_ref = 4519 # Initial hangul characters start after '0x11A8'
# Calculate relative position of each jamo
terminal = (ord(item) - 44032) % 28
mid = 1 + ((ord(item) - 44032 - terminal) % 588 // 28)
initial = 1 + ((ord(item) - 44032 + 1) // 588)
# Calculate base 10 number of each Unicode Jamo
terminal = terminal_chr_ref + terminal
mid = mid_chr_ref + mid
initial = intial_chr_ref + initial
# Convert to character, then to compatibility jamo
jamo = [chr(initial), chr(mid), chr(terminal)]
for j in jamo:
if j:
output.extend(lookup_jamo_KO(j))
return output
def lookup_jamo_KO(letter_block):
"""Converts all letters and multi-letter Unicode characters into a list
of initial or medial compatibility jamo.
Lookup keys are Unicode Hangul Jamo (1100–11FF). Their values are lists
of Unicode Hangul Compatibility Jamo which have only one representation
per character. For example, the inital jamo 'ᄀ' (U+1100) and the terminal
jamo 'ᆨ' (U+11a8) will both become compatibility jamo "ㄱ" (U+3131). If
a character isn't a compatibility jamo and isn't in the lookup, an error
is thrown.
N.B. To be more efficient, this table could be set as a constant and
referenced later.
"""
lookup = {
'ᄀ': ["ㄱ"],
'ᄁ': ["ㄲ"],
'ᄂ': ["ㄴ"],
'ᄃ': ["ㄷ"],
'ᄄ': ["ㄸ"],
'ᄅ': ["ㄹ"],
'ᄆ': ["ㅁ"],
'ᄇ': ["ㅂ"],
'ᄈ': ["ㅃ"],
'ᄉ': ["ㅅ"],
'ᄊ': ["ㅆ"],
'ᄋ': ["ㅇ"],
'ᄌ': ["ㅈ"],
'ᄍ': ["ㅉ"],
'ᄎ': ["ㅊ"],
'ᄏ': ["ㅋ"],
'ᄐ': ["ㅌ"],
'ᄑ': ["ㅍ"],
'ᄒ': ["ㅎ"],
'ᅡ': ["ㅏ"],
'ᅢ': ["ㅐ"],
'ᅣ': ["ㅑ"],
'ᅤ': ["ㅒ"],
'ᅥ': ["ㅓ"],
'ᅦ': ["ㅔ"],
'ᅧ': ["ㅕ"],
'ᅨ': ["ㅖ"],
'ᅩ': ["ㅗ"],
'ᅪ': ["ㅗ", "ㅏ"],
'ᅫ': ["ㅗ", "ㅐ"],
'ᅬ': ["ㅗ", "ㅣ"],
'ᅭ': ["ㅛ"],
'ᅮ': ["ㅜ"],
'ᅯ': ["ㅜ", "ㅓ"],
'ᅰ': ["ㅜ", "ㅔ"],
'ᅱ': ["ㅜ", "ㅣ"],
'ᅲ': ["ㅠ"],
'ᅳ': ["ㅡ"],
'ᅴ': ["ㅣ"],
'ᅵ': ["ㅣ"],
'ᆨ': ["ㄱ"],
'ᆩ': ["ㄲ"],
'ᆪ': ["ㄱ", "ㅅ"],
'ᆫ': ["ㄴ"],
'ᆬ': ["ㄴ", "ㅈ"],
'ᆭ': ["ㄴ", "ㅎ"],
'ᆮ': ["ᄃ"],
'ᆯ': ["ㄹ"],
'ᆰ': ["ㄹ", "ㄱ"],
'ᆱ': ["ㄹ", "ㅁ"],
'ᆲ': ["ㄹ", "ㅂ"],
'ᆳ': ["ㄹ", "ㅅ"],
'ᆴ': ["ㄹ", "ㅌ"],
'ᆵ': ["ㄹ", "ㅍ"],
'ᆶ': ["ㄹ", "ㅎ"],
'ᆷ': ["ㅁ"],
'ᆸ': ["ㅂ"],
'ᆹ': ["ㅂ", "ㅅ"],
'ᆺ': ["ㅅ"],
'ᆻ': ["ㅆ"],
'ᆼ': ["ㅇ"],
'ᆽ': ["ㅈ"],
'ᆾ': ["ㅊ"],
'ᆿ': ["ㅋ"],
'ᇀ': ["ㅌ"],
'ᇁ': ["ㅍ"],
'ᇂ': ["ㅎ"],
}
output_list = lookup.get(letter_block, [])
return output_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment