multiword_briefs.py
import re | |
LONGEST_KEY = 1 | |
PARTS_RE = re.compile(r""" | |
^ | |
(?P<lhs> S? T? K? P? W? H? R? | |
A? O?) | |
(?P<middle> (\* | - | )) | |
(?P<subject> (E? U?)) | |
(?P<rhs> F? R? P? B? L? G? T? S? D? Z?) | |
$ | |
""", re.VERBOSE) | |
BLACKLIST = { | |
"EUPL", # prefer "{im^}" over "I'm" | |
"*EUF", # prefer "{^ive}" over "I've" | |
"THAPBT", # prefer "than the" over "that don't" | |
"TKOPBLG", # prefer "dodge" over "do know" | |
"KEUPBG", # prefer "king" over "can I think" | |
"KWROUPBG", # prefer "young" over "I don't you think" | |
"KWRAO*URL", # prefer "URL" over "you don't you really" | |
"EUFT", # prefer "{^ist}" over "I have the" | |
"TKUB", # prefer "dub" over "do you be" | |
} | |
assert all(PARTS_RE.match(i) for i in BLACKLIST) | |
SUBJECTS = { | |
"": None, | |
"U": "you", | |
"EU": "I", | |
} | |
SUFFIXES = { | |
"*D": "'d", | |
"*L": "'ll", | |
"*PL": ("'m", ["I"]), | |
"*R": ("'re", [None, "you"]), | |
"*F": "'ve", | |
"-R": ("are", [None, "you"]), | |
"-B": "be", | |
"-BG": "can", | |
"-BGT": "can't", | |
"-BGD": "could", | |
"*BGT": "couldn't", | |
"*PBT": "didn't", | |
"-PBT": "don't", | |
"*FPB": "even", | |
"*FR": "ever", | |
"-RBGT": "get", | |
"-RBGTS": "get to", | |
"*RBGT": "got", | |
"*RBGTS": "got to", | |
"-D": "had", | |
"-F": "have", | |
"-FT": "have the", | |
"-FTS": "have to", | |
"-FPBT": "haven't", | |
"-PBLG": "know", | |
"*PBLG": "knew", | |
"*PBL": "mean", | |
"*PBLTS": "mean to", | |
"-PLT": "might", | |
"*PBD": "need", | |
"-PBL": "only", | |
"*RL": "really", | |
"-RBD": "should", | |
"*RBT": "shouldn't", | |
"-PBG": "think", | |
"-RPBD": "understand", | |
"*RPBD": "understood", | |
"-PT": "want", | |
"-PTS": "want to", | |
"-FS": ("was", [None, "I"]), | |
"*FBT": ("wasn't", [None, "I"]), | |
"-RP": "were", | |
"-RPTS": "were to", | |
"*RPT": "weren't", | |
"-L": "will", | |
"-FBT": "won't", | |
"-LD": "would", | |
"*LT": "wouldn't", | |
} | |
ADVERB = {"ever", "even", "only", "really"} | |
PRESENT_NON_AUX = {"be", "get", "go", "have", "know", "mean", "need", "recall", | |
"think", "understand", "want"} | |
PREFIXES = { | |
"SKP": "and", | |
"SKPO": "and I don't", | |
"SKPAO": "and you don't", | |
"STKP": "and if", | |
"STKPO": "and if I don't", | |
"STKPAO": "and if you don't", | |
"K": ("can", ADVERB | PRESENT_NON_AUX), | |
"KO": ("could", ADVERB | PRESENT_NON_AUX), | |
"TK": ("did", ADVERB | PRESENT_NON_AUX), | |
"TKO": ("do", ADVERB | PRESENT_NON_AUX), | |
"SR": ("have", ADVERB | PRESENT_NON_AUX | {"got", "had"}), | |
"KWRO": ("I don't", ADVERB | PRESENT_NON_AUX), | |
"KWRAO": ("you don't", ADVERB | PRESENT_NON_AUX), | |
"STP": "if", | |
"STPO": "if I don't", | |
"STPAO": "if you don't", | |
"STHA": "is that", | |
"SWHA": "is what", | |
"STHO": "is that I don't", | |
"STHAO": "is that you don't", | |
"SHO": ("should", ADVERB | PRESENT_NON_AUX), | |
"SW": "so", | |
"SWO": "so I don't", | |
"SWAO": "so you don't", | |
"STPW": "so if", | |
"STPWO": "so if I don't", | |
"STPWAO": "so if you don't", | |
"STWHA": "so that", | |
"STWHO": "so that I don't", | |
"STWHAO": "so that you don't", | |
"SWHA": "so what", | |
"THA": "that", | |
"THAO": "that you don't", | |
"THO": "that I don't", | |
"WHA": "what", | |
"WHO": "what I don't", | |
"WHAO": "what you don't", | |
"WO": ("would", ADVERB | PRESENT_NON_AUX), | |
} | |
def lookup(key): | |
stroke = key[0] | |
try: | |
translation = do_lookup(stroke) | |
except Exception as e: | |
# Don't mask KeyErrors that arise because of my programming mistakes. | |
raise RuntimeError("couldn't translate multiword brief") from e | |
if not translation: | |
raise KeyError() | |
return translation | |
def do_lookup(stroke): | |
if stroke in BLACKLIST: | |
return None | |
parts = PARTS_RE.match(stroke) | |
if not parts: | |
return None | |
parts = parts.groupdict() | |
lhs = parts["lhs"] | |
middle = parts["middle"] | |
subject = parts["subject"] | |
rhs = parts["rhs"] | |
assert not any("*" in i or "-" in i for i in SUBJECTS) | |
if middle: | |
rhs = middle + rhs | |
else: | |
rhs = "-" + rhs | |
lhs_value = PREFIXES.get(lhs) | |
subject_value = SUBJECTS.get(subject) | |
rhs_value = SUFFIXES.get(rhs) | |
if isinstance(rhs_value, tuple): | |
rhs_value, allowed_subjects = rhs_value | |
if subject_value not in allowed_subjects: | |
return None | |
if isinstance(lhs_value, tuple): | |
lhs_value, allowed_suffixes = lhs_value | |
if rhs_value and rhs_value not in allowed_suffixes: | |
return None | |
if lhs and not lhs_value: | |
return None | |
if subject and not subject_value: | |
return None | |
if rhs and not rhs_value: | |
return None | |
num_values = sum( | |
int(i is not None) | |
for i in [lhs_value, subject_value, rhs_value] | |
) | |
if num_values < 2: | |
return None | |
# Bit of a hack to support multiple-word left-hand sides. | |
if lhs_value and len(lhs_value.split()) >= 3 and subject: | |
return None | |
words = [] | |
if lhs_value: | |
words.append(lhs_value) | |
if subject_value: | |
words.append(subject_value) | |
if rhs_value: | |
if rhs_value.startswith("'"): | |
assert words | |
words[-1] += rhs_value | |
else: | |
words.append(rhs_value) | |
assert words | |
return " ".join(words) | |
def test_do_lookup(): | |
tests = [ | |
("THARBD", "that should"), | |
("THAEURBD", "that I should"), | |
("EULD", "I would"), | |
("THA*EUF", "that I've"), | |
("THRAEUGS", None), | |
("SR*U", None), | |
("SRUB", "have you be"), | |
("SR*UFR", "have you ever"), | |
("EUBG", "I can"), | |
("UR", "you are"), | |
("TKUPBT", None), | |
("SRURBD", None), | |
("STKPOPT", "and if I don't want"), | |
("STWHAOF", "so that you don't have"), | |
("WHAOEUL", None), | |
("SWOPBLG", "so I don't know"), | |
("STWHAOFTS", "so that you don't have to"), | |
] | |
for input, expected in tests: | |
output = do_lookup(input) | |
assert output == expected | |
def reverse_lookup(text): | |
return [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment