Skip to content

Instantly share code, notes, and snippets.

@RussellSprouts
Last active July 8, 2024 20:54
Show Gist options
  • Save RussellSprouts/6c5fbdf6689b8dc3f99406d28a9f7884 to your computer and use it in GitHub Desktop.
Save RussellSprouts/6c5fbdf6689b8dc3f99406d28a9f7884 to your computer and use it in GitHub Desktop.
Finding the syllable block with the most strokes in real Korean words
# To use, `sudo apt install hunspell-ko`.
import unicodedata
f = open("/usr/share/myspell/dicts/ko.dic", "r")
# Ensure the text is composed so that each syllable is a single
# character.
normalized_text = unicodedata.normalize('NFC', f.read())
stroke_counts = {
'KIYEOK': 1,
'SSANGKIYEOK': 2,
'NIEUN': 1,
'TIKEUT': 2,
'SSANGTIKEUT': 4,
'RIEUL': 3,
'MIEUM': 3,
'PIEUP': 4,
'SSANGPIEUP': 8,
'SIOS': 2,
'SSANGSIOS': 4,
'IEUNG': 1,
'CIEUC': 2,
'SSANGCIEUC': 4,
'CHIEUCH': 3,
'SSANGCHIEUCH': 6,
'KHIEUKH': 2,
'THIEUTH': 3,
'PHIEUPH': 4,
'HIEUH': 3,
'A': 2,
'AE': 3,
'YA': 3,
'YAE': 4,
'EO': 2,
'E': 3,
'YEO': 3,
'YE': 4,
'O': 2,
'WA': 4,
'WAE': 5,
'OE': 3,
'YO': 3,
'U': 2,
'WEO': 4,
'WE': 5,
'WI': 3,
'YU': 3,
'EU': 1,
'YI': 2,
'I': 1,
}
best_strokes = 0
for line in normalized_text.splitlines():
for char in line:
try:
name = unicodedata.name(char)
except:
pass
if 'HANGUL SYLLABLE' in name:
# Decompose the syllable into characters
jamo = unicodedata.normalize('NFD', char)
total_strokes = 0
for j in jamo:
try:
jamo_name = unicodedata.name(j).split(' ')[-1]
except:
pass
total_strokes += sum([stroke_counts[a] for a in jamo_name.split('-')])
if total_strokes > best_strokes:
print(char, total_strokes, line)
best_strokes = total_strokes
# Results:
# To use, `sudo apt install hunspell-ko`.
import unicodedata
f = open("/usr/share/myspell/dicts/ko.dic", "r")
# Ensure the text is composed so that each syllable is a single
# character.
normalized_text = unicodedata.normalize('NFC', f.read())
stroke_counts = {
'KIYEOK': 1,
'SSANGKIYEOK': 2,
'NIEUN': 1,
'TIKEUT': 2,
'SSANGTIKEUT': 4,
'RIEUL': 3,
'MIEUM': 3,
'PIEUP': 4,
'SSANGPIEUP': 8,
'SIOS': 2,
'SSANGSIOS': 4,
'IEUNG': 1,
'CIEUC': 2,
'SSANGCIEUC': 4,
'CHIEUCH': 3,
'SSANGCHIEUCH': 6,
'KHIEUKH': 2,
'THIEUTH': 3,
'PHIEUPH': 4,
'HIEUH': 3,
'A': 2,
'AE': 3,
'YA': 3,
'YAE': 4,
'EO': 2,
'E': 3,
'YEO': 3,
'YE': 4,
'O': 2,
'WA': 4,
'WAE': 5,
'OE': 3,
'YO': 3,
'U': 2,
'WEO': 4,
'WE': 5,
'WI': 3,
'YU': 3,
'EU': 1,
'YI': 2,
'I': 1,
}
best_strokes = 0
for line in normalized_text.splitlines():
for char in line:
try:
name = unicodedata.name(char)
except:
pass
if 'HANGUL SYLLABLE' in name:
# Decompose the syllable into characters
jamo = unicodedata.normalize('NFD', char)
total_strokes = 0
for j in jamo:
try:
jamo_name = unicodedata.name(j).split(' ')[-1]
except:
pass
total_strokes += sum([stroke_counts[a] for a in jamo_name.split('-')])
if total_strokes >= best_strokes:
print(char, total_strokes, line)
best_strokes = total_strokes
# Results:
# 뺄 14 내뺄/4
# 뽑 14 내뽑다/44
# 뽑 14 내뽑아/2
# 뽑 14 내뽑은/3
# 뽑 14 내뽑을/4
# 뺌 14 발뺌/10
# 뺌 14 발뺌하다/44
# 뺌 14 발뺌하여/2
# 뺌 14 발뺌한/3
# 뺌 14 발뺌할/4
# 뺌 14 발뺌해/2
# 뺄 14 뺄/4
# 뺄 14 뺄셈/10
# 뺨 14 뺨/10
# 뺨 14 뺨따귀/25
# 뺨 14 뺨쳐/2
# 뺨 14 뺨치다/44
# 뺨 14 뺨치어/2
# 뺨 14 뺨친/3
# 뺨 14 뺨칠/4
# 뼘 14 뼘/10
# 뽑 14 뽑다/44
# 뽑 14 뽑아/2
# 뽑 14 뽑아내다/44
# 뽑 14 뽑은/3
# 뽑 14 뽑을/4
# 뽑 14 뽑혀/2
# 뽑 14 뽑히다/44
# 뽑 14 뽑히어/2
# 뽑 14 뽑힌/3
# 뽑 14 뽑힐/4
# 뺄 14 용뺄/4
# 뽑 14 제비뽑기/25
# 뼘 14 한뼘바지/25
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment