lynn/toaq-steno.py Secret

## toaq-steno.py
import itertools
import re
import unicodedata

#
#  S T P H * a e m 3
#  S K W R * o u q 2 1
#      + O   E U
#

onsets = {
    "": "",
    "PW": "b",
    "KR": "c",
    "KH": "ch",
    "TK": "d",
    "TP": "f",
    "TKPW": "g",
    "H": "h",
    "SKWR": "j",
    "K": "k",
    "HR": "l",
    "PH": "m",
    "TPH": "n",
    "KWR": "nh",
    "P": "p",
    "R": "r",
    "S": "s",
    "SH": "sh",
    "T": "t",
    "W": "ꝡ",
    "KW": "z",
}

vowels = {
    "": "a",
    "E": "e",
    "EU": "ı",
    "O": "o",
    "U": "u",
}

tails = {
    "": "",
    "F": "a",
    "R": "o",
    "P": "e",
    "B": "u",
    "FR": "ao",
    "PB": "ı",
    "FP": "eı",
    "RB": "oı",
    "FRPB": "aı",
}

valid_rimes = """
    a u ı o e aq uq ıq oq eq am um ım om em
    aı ao oı eı
    ua ıa oa ea uaq ıaq oaq uam ıam oam eam
    ıu eu ıuq euq ıum eum uı uıq uım
    uo ıo eo uoq ıoq eoq uom ıom eom
    ue ıe oe ueq ıeq oeq uem ıem oem
    uaı uao uoı ueı
    ıaı ıao ıoı ıeı
    oaı oao oeı
    eaı eao eoı
""".split()


def in_tone(s, num):
    s = unicodedata.normalize("NFKD", s)
    s = re.sub("[\u0300\u0301\u0308\u0302]", "", s)
    d = ["", "\u0300", "\u0301", "\u0308", "\u0302"][num]
    return unicodedata.normalize(
        "NFKC",
        re.sub("[aeiıou]", lambda m: m[0].replace('ı', 'i' if num > 1 else 'ı') + d, s, count=1, flags=re.I),
    )


def underdot(s):
    s = unicodedata.normalize("NFKD", s)
    s = re.sub("\u0323", "", s)
    return unicodedata.normalize(
        "NFKC",
        re.sub(
            "[aeiıou][\u0300\u0301\u0308\u0302]?",
            lambda m: m[0] + "\u0323",
            s,
            count=1,
            flags=re.I,
        ),
    )


def to_toaq(stroke):
    if "*" in stroke or stroke in ("", "-S", "-SZ", "-Z"):
        raise KeyError

    m = re.search("(S?T?K?P?W?H?R?)(A?O?[-*]?E?U?)(F?R?P?B?)(L?G?)(T?S?D?Z?)", stroke)
    if m:
        onset, vowel, tail, extra, tone = m.groups()
        if "D" in extra:
            raise KeyError
        cont = "A" in vowel
        vowel = re.sub("[-*A]", "", vowel)

        if onset == "" and vowel == "" and tail == "F":
            raku = "a"
        elif vowel == "" and (len(tail) == 1 or tail == "PB"):
            # Something like "Ke" instead of "KE".
            raise KeyError
        else:
            # But we do want to write CF syllables as "Kao, Kae, Kaoeu" etc:
            v = "" if vowel == "" and tail else vowels[vowel]
            raku = onsets[onset] + v + tails[tail]

        if "G" in extra:
            raku += "q"
        elif "L" in extra:
            raku += "m"
        elif extra:
            raise KeyError
        if raku.lstrip("bcdfghjklmnprstꝡz'") not in valid_rimes:
           raise KeyError

        if tone == "TS":
            raku = in_tone(raku, 4)
        elif tone == "T":
            raku = in_tone(raku, 3)
        elif tone == "S":
            raku = in_tone(raku, 2)
        elif tone == "Z":
            raku = in_tone(raku, 1)
        elif tone:
            raise KeyError

        glue = "{^}" if cont else ""
        oaomo = "'" if cont and onset == "" else ""
        return glue + oaomo + raku


LONGEST_KEY = 4


def lookup(key):
    if len(key) >= 2:
        word = to_toaq(key[0])
        for k in key[1:]:
            if k == "-S":
                word = in_tone(word, 2)
            elif k == "-T":
                word = in_tone(word, 3)
            elif k == "-TS":
                word = in_tone(word, 4)
            elif k == "-B":
                word = underdot(word) + "{^}"
            elif "A" in k:
                word += to_toaq(k)
            else:
                raise KeyError
        return word
    else:
        return to_toaq(key[0])


def subsequences(s):
    for k in range(0, len(s)+1):
        for c in itertools.combinations(s, k):
            yield "".join(c)

if __name__ == "__main__":
    d = {}
    for left in subsequences("STKPWHR"):
        for v in subsequences("AO*EU"):
            for right in subsequences("FRPBLGTSZ"):
                v = v or "-"
                stroke = (left + v + right).rstrip("-")
                if stroke in d:
                    print("Duplicate", stroke)
                try:
                    d[stroke] = to_toaq(stroke)
                except KeyError:
                    pass
    import json
    with open("toaq.json", "w") as f:
        f.write(json.dumps(d, ensure_ascii=False, indent=0))
    print("Done!", len(d))
	import itertools
	import re
	import unicodedata

	#
	# S T P H * a e m 3
	# S K W R * o u q 2 1
	# + O E U
	#

	onsets = {
	"": "",
	"PW": "b",
	"KR": "c",
	"KH": "ch",
	"TK": "d",
	"TP": "f",
	"TKPW": "g",
	"H": "h",
	"SKWR": "j",
	"K": "k",
	"HR": "l",
	"PH": "m",
	"TPH": "n",
	"KWR": "nh",
	"P": "p",
	"R": "r",
	"S": "s",
	"SH": "sh",
	"T": "t",
	"W": "ꝡ",
	"KW": "z",
	}

	vowels = {
	"": "a",
	"E": "e",
	"EU": "ı",
	"O": "o",
	"U": "u",
	}

	tails = {
	"": "",
	"F": "a",
	"R": "o",
	"P": "e",
	"B": "u",
	"FR": "ao",
	"PB": "ı",
	"FP": "eı",
	"RB": "oı",
	"FRPB": "aı",
	}

	valid_rimes = """
	a u ı o e aq uq ıq oq eq am um ım om em
	aı ao oı eı
	ua ıa oa ea uaq ıaq oaq uam ıam oam eam
	ıu eu ıuq euq ıum eum uı uıq uım
	uo ıo eo uoq ıoq eoq uom ıom eom
	ue ıe oe ueq ıeq oeq uem ıem oem
	uaı uao uoı ueı
	ıaı ıao ıoı ıeı
	oaı oao oeı
	eaı eao eoı
	""".split()


	def in_tone(s, num):
	s = unicodedata.normalize("NFKD", s)
	s = re.sub("[\u0300\u0301\u0308\u0302]", "", s)
	d = ["", "\u0300", "\u0301", "\u0308", "\u0302"][num]
	return unicodedata.normalize(
	"NFKC",
	re.sub("[aeiıou]", lambda m: m[0].replace('ı', 'i' if num > 1 else 'ı') + d, s, count=1, flags=re.I),
	)


	def underdot(s):
	s = unicodedata.normalize("NFKD", s)
	s = re.sub("\u0323", "", s)
	return unicodedata.normalize(
	"NFKC",
	re.sub(
	"[aeiıou][\u0300\u0301\u0308\u0302]?",
	lambda m: m[0] + "\u0323",
	s,
	count=1,
	flags=re.I,
	),
	)



	def to_toaq(stroke):
	if "*" in stroke or stroke in ("", "-S", "-SZ", "-Z"):
	raise KeyError

	m = re.search("(S?T?K?P?W?H?R?)(A?O?[-*]?E?U?)(F?R?P?B?)(L?G?)(T?S?D?Z?)", stroke)
	if m:
	onset, vowel, tail, extra, tone = m.groups()
	if "D" in extra:
	raise KeyError
	cont = "A" in vowel
	vowel = re.sub("[-*A]", "", vowel)

	if onset == "" and vowel == "" and tail == "F":
	raku = "a"
	elif vowel == "" and (len(tail) == 1 or tail == "PB"):
	# Something like "Ke" instead of "KE".
	raise KeyError
	else:
	# But we do want to write CF syllables as "Kao, Kae, Kaoeu" etc:
	v = "" if vowel == "" and tail else vowels[vowel]
	raku = onsets[onset] + v + tails[tail]

	if "G" in extra:
	raku += "q"
	elif "L" in extra:
	raku += "m"
	elif extra:
	raise KeyError
	if raku.lstrip("bcdfghjklmnprstꝡz'") not in valid_rimes:
	raise KeyError

	if tone == "TS":
	raku = in_tone(raku, 4)
	elif tone == "T":
	raku = in_tone(raku, 3)
	elif tone == "S":
	raku = in_tone(raku, 2)
	elif tone == "Z":
	raku = in_tone(raku, 1)
	elif tone:
	raise KeyError

	glue = "{^}" if cont else ""
	oaomo = "'" if cont and onset == "" else ""
	return glue + oaomo + raku


	LONGEST_KEY = 4


	def lookup(key):
	if len(key) >= 2:
	word = to_toaq(key[0])
	for k in key[1:]:
	if k == "-S":
	word = in_tone(word, 2)
	elif k == "-T":
	word = in_tone(word, 3)
	elif k == "-TS":
	word = in_tone(word, 4)
	elif k == "-B":
	word = underdot(word) + "{^}"
	elif "A" in k:
	word += to_toaq(k)
	else:
	raise KeyError
	return word
	else:
	return to_toaq(key[0])


	def subsequences(s):
	for k in range(0, len(s)+1):
	for c in itertools.combinations(s, k):
	yield "".join(c)

	if __name__ == "__main__":
	d = {}
	for left in subsequences("STKPWHR"):
	for v in subsequences("AO*EU"):
	for right in subsequences("FRPBLGTSZ"):
	v = v or "-"
	stroke = (left + v + right).rstrip("-")
	if stroke in d:
	print("Duplicate", stroke)
	try:
	d[stroke] = to_toaq(stroke)
	except KeyError:
	pass
	import json
	with open("toaq.json", "w") as f:
	f.write(json.dumps(d, ensure_ascii=False, indent=0))
	print("Done!", len(d))