Skip to content

Instantly share code, notes, and snippets.

@morinted
Created April 21, 2021 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save morinted/ad6fa98ecca4ee06c5c0399737d97997 to your computer and use it in GitHub Desktop.
Save morinted/ad6fa98ecca4ee06c5c0399737d97997 to your computer and use it in GitHub Desktop.
Generate diacritic fingerspelling dictionary for Plover
# Save as diacritics.py, then run `python3 diacritics.py`
import unicodedata
import json
characters = [
[ # Acute
'KRAO*UT', '\u0301', 'á', 'é', 'í', 'ó', 'ú'
],
[ # Grave
'TKPWRA*F', '\u0300', 'à', 'è', 'ì', 'ò', 'ù'
],
[ # Circumflex
'KREURBG', '\u0302', 'â', 'ê', 'î', 'ô', 'û'
],
[ # Umlaut
'*UPLT', '\u0308', 'ä', 'ë', 'ï', 'ï', 'ö', 'ü'
],
[ # Tilde
'*LTD', '\u0303', 'ã', 'ñ', 'õ'
],
[ # Ring
'*RNG', '\u030A', 'å'
],
[ # Cedilla
'STKEUL', '\u0327', 'ç'
]
]
fingerspelling = {
'a': 'A*',
'c': 'KR*',
'e': '*E',
'i': '*EU',
'n': 'TPH*',
'o': 'O*',
'u': '*U',
'y': 'KWR*'
}
uppercase_fingerspelling_stroke_fragment = 'P'
unhandled = [
'æ', 'ø', 'ß'
]
if __name__ == '__main__':
the_dict = {}
for character_set in characters:
(diacritic_stroke, combining_unicode, *characters) = character_set
the_dict[diacritic_stroke] = f'{{&{combining_unicode}}}'
for character in characters:
# é → e
letter = unicodedata.normalize('NFKD', character).encode('ASCII', 'ignore')
letter = str(letter, 'utf-8')
# e → *E
fingerspelling_stroke = fingerspelling[letter]
# É
uppercase_character = character.upper()
# *E → *EP
uppercase_fingerspelling_stroke = fingerspelling_stroke + \
uppercase_fingerspelling_stroke_fragment
the_dict[
f'{fingerspelling_stroke}/{diacritic_stroke}'
] = f'{{>}}{{&{character}}}'
the_dict[
f'{uppercase_fingerspelling_stroke}/{diacritic_stroke}'
] = f'{{&{uppercase_character}}}'
print(json.dumps(the_dict, ensure_ascii=False, indent=4))
{
"KRAO*UT": "{&́}",
"A*/KRAO*UT": "{>}{&á}",
"A*P/KRAO*UT": "{&Á}",
"*E/KRAO*UT": "{>}{&é}",
"*EP/KRAO*UT": "{&É}",
"*EU/KRAO*UT": "{>}{&í}",
"*EUP/KRAO*UT": "{&Í}",
"O*/KRAO*UT": "{>}{&ó}",
"O*P/KRAO*UT": "{&Ó}",
"*U/KRAO*UT": "{>}{&ú}",
"*UP/KRAO*UT": "{&Ú}",
"TKPWRA*F": "{&̀}",
"A*/TKPWRA*F": "{>}{&à}",
"A*P/TKPWRA*F": "{&À}",
"*E/TKPWRA*F": "{>}{&è}",
"*EP/TKPWRA*F": "{&È}",
"*EU/TKPWRA*F": "{>}{&ì}",
"*EUP/TKPWRA*F": "{&Ì}",
"O*/TKPWRA*F": "{>}{&ò}",
"O*P/TKPWRA*F": "{&Ò}",
"*U/TKPWRA*F": "{>}{&ù}",
"*UP/TKPWRA*F": "{&Ù}",
"KREURBG": "{&̂}",
"A*/KREURBG": "{>}{&â}",
"A*P/KREURBG": "{&Â}",
"*E/KREURBG": "{>}{&ê}",
"*EP/KREURBG": "{&Ê}",
"*EU/KREURBG": "{>}{&î}",
"*EUP/KREURBG": "{&Î}",
"O*/KREURBG": "{>}{&ô}",
"O*P/KREURBG": "{&Ô}",
"*U/KREURBG": "{>}{&û}",
"*UP/KREURBG": "{&Û}",
"*UPLT": "{&̈}",
"A*/*UPLT": "{>}{&ä}",
"A*P/*UPLT": "{&Ä}",
"*E/*UPLT": "{>}{&ë}",
"*EP/*UPLT": "{&Ë}",
"*EU/*UPLT": "{>}{&ï}",
"*EUP/*UPLT": "{&Ï}",
"O*/*UPLT": "{>}{&ö}",
"O*P/*UPLT": "{&Ö}",
"*U/*UPLT": "{>}{&ü}",
"*UP/*UPLT": "{&Ü}",
"*LTD": "{&̃}",
"A*/*LTD": "{>}{&ã}",
"A*P/*LTD": "{&Ã}",
"TPH*/*LTD": "{>}{&ñ}",
"TPH*P/*LTD": "{&Ñ}",
"O*/*LTD": "{>}{&õ}",
"O*P/*LTD": "{&Õ}",
"*RNG": "{&̊}",
"A*/*RNG": "{>}{&å}",
"A*P/*RNG": "{&Å}",
"STKEUL": "{&̧}",
"KR*/STKEUL": "{>}{&ç}",
"KR*P/STKEUL": "{&Ç}"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment