-
-
Save arxanas/ab337490bf204ed81b5e9d7d3417fa8a to your computer and use it in GitHub Desktop.
multiword_briefs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
LONGEST_KEY = 1 | |
PARTS_RE = re.compile(r""" | |
^ | |
(?P<lhs> S? T? K? P? W? H? R? | |
A? O?) | |
(?P<middle> (\* | - | )) | |
(?P<subject> (E? U?)) | |
(?P<rhs> F? R? P? B? L? G? T? S? D? Z?) | |
$ | |
""", re.VERBOSE) | |
BLACKLIST = { | |
"EUPL", # prefer "{im^}" over "I'm" | |
"*EUF", # prefer "{^ive}" over "I've" | |
"THAPBT", # prefer "than the" over "that don't" | |
"TKOPBLG", # prefer "dodge" over "do know" | |
"KEUPBG", # prefer "king" over "can I think" | |
"KWROUPBG", # prefer "young" over "I don't you think" | |
"KWRAO*URL", # prefer "URL" over "you don't you really" | |
"EUFT", # prefer "{^ist}" over "I have the" | |
"TKUB", # prefer "dub" over "do you be" | |
} | |
assert all(PARTS_RE.match(i) for i in BLACKLIST) | |
SUBJECTS = { | |
"": None, | |
"U": "you", | |
"EU": "I", | |
} | |
SUFFIXES = { | |
"*D": "'d", | |
"*L": "'ll", | |
"*PL": ("'m", ["I"]), | |
"*R": ("'re", [None, "you"]), | |
"*F": "'ve", | |
"-R": ("are", [None, "you"]), | |
"-B": "be", | |
"-BG": "can", | |
"-BGT": "can't", | |
"-BGD": "could", | |
"*BGT": "couldn't", | |
"*PBT": "didn't", | |
"-PBT": "don't", | |
"*FPB": "even", | |
"*FR": "ever", | |
"-RBGT": "get", | |
"-RBGTS": "get to", | |
"*RBGT": "got", | |
"*RBGTS": "got to", | |
"-D": "had", | |
"-F": "have", | |
"-FT": "have the", | |
"-FTS": "have to", | |
"-FPBT": "haven't", | |
"-PBLG": "know", | |
"*PBLG": "knew", | |
"*PBL": "mean", | |
"*PBLTS": "mean to", | |
"-PLT": "might", | |
"*PBD": "need", | |
"-PBL": "only", | |
"*RL": "really", | |
"-RBD": "should", | |
"*RBT": "shouldn't", | |
"-PBG": "think", | |
"-RPBD": "understand", | |
"*RPBD": "understood", | |
"-PT": "want", | |
"-PTS": "want to", | |
"-FS": ("was", [None, "I"]), | |
"*FBT": ("wasn't", [None, "I"]), | |
"-RP": "were", | |
"-RPTS": "were to", | |
"*RPT": "weren't", | |
"-L": "will", | |
"-FBT": "won't", | |
"-LD": "would", | |
"*LT": "wouldn't", | |
} | |
ADVERB = {"ever", "even", "only", "really"} | |
PRESENT_NON_AUX = {"be", "get", "go", "have", "know", "mean", "need", "recall", | |
"think", "understand", "want"} | |
PREFIXES = { | |
"SKP": "and", | |
"SKPO": "and I don't", | |
"SKPAO": "and you don't", | |
"STKP": "and if", | |
"STKPO": "and if I don't", | |
"STKPAO": "and if you don't", | |
"K": ("can", ADVERB | PRESENT_NON_AUX), | |
"KO": ("could", ADVERB | PRESENT_NON_AUX), | |
"TK": ("did", ADVERB | PRESENT_NON_AUX), | |
"TKO": ("do", ADVERB | PRESENT_NON_AUX), | |
"SR": ("have", ADVERB | PRESENT_NON_AUX | {"got", "had"}), | |
"KWRO": ("I don't", ADVERB | PRESENT_NON_AUX), | |
"KWRAO": ("you don't", ADVERB | PRESENT_NON_AUX), | |
"STP": "if", | |
"STPO": "if I don't", | |
"STPAO": "if you don't", | |
"STHA": "is that", | |
"SWHA": "is what", | |
"STHO": "is that I don't", | |
"STHAO": "is that you don't", | |
"SHO": ("should", ADVERB | PRESENT_NON_AUX), | |
"SW": "so", | |
"SWO": "so I don't", | |
"SWAO": "so you don't", | |
"STPW": "so if", | |
"STPWO": "so if I don't", | |
"STPWAO": "so if you don't", | |
"STWHA": "so that", | |
"STWHO": "so that I don't", | |
"STWHAO": "so that you don't", | |
"SWHA": "so what", | |
"THA": "that", | |
"THAO": "that you don't", | |
"THO": "that I don't", | |
"WHA": "what", | |
"WHO": "what I don't", | |
"WHAO": "what you don't", | |
"WO": ("would", ADVERB | PRESENT_NON_AUX), | |
} | |
def lookup(key): | |
stroke = key[0] | |
try: | |
translation = do_lookup(stroke) | |
except Exception as e: | |
# Don't mask KeyErrors that arise because of my programming mistakes. | |
raise RuntimeError("couldn't translate multiword brief") from e | |
if not translation: | |
raise KeyError() | |
return translation | |
def do_lookup(stroke): | |
if stroke in BLACKLIST: | |
return None | |
parts = PARTS_RE.match(stroke) | |
if not parts: | |
return None | |
parts = parts.groupdict() | |
lhs = parts["lhs"] | |
middle = parts["middle"] | |
subject = parts["subject"] | |
rhs = parts["rhs"] | |
assert not any("*" in i or "-" in i for i in SUBJECTS) | |
if middle: | |
rhs = middle + rhs | |
else: | |
rhs = "-" + rhs | |
lhs_value = PREFIXES.get(lhs) | |
subject_value = SUBJECTS.get(subject) | |
rhs_value = SUFFIXES.get(rhs) | |
if isinstance(rhs_value, tuple): | |
rhs_value, allowed_subjects = rhs_value | |
if subject_value not in allowed_subjects: | |
return None | |
if isinstance(lhs_value, tuple): | |
lhs_value, allowed_suffixes = lhs_value | |
if rhs_value and rhs_value not in allowed_suffixes: | |
return None | |
if lhs and not lhs_value: | |
return None | |
if subject and not subject_value: | |
return None | |
if rhs and not rhs_value: | |
return None | |
num_values = sum( | |
int(i is not None) | |
for i in [lhs_value, subject_value, rhs_value] | |
) | |
if num_values < 2: | |
return None | |
# Bit of a hack to support multiple-word left-hand sides. | |
if lhs_value and len(lhs_value.split()) >= 3 and subject: | |
return None | |
words = [] | |
if lhs_value: | |
words.append(lhs_value) | |
if subject_value: | |
words.append(subject_value) | |
if rhs_value: | |
if rhs_value.startswith("'"): | |
assert words | |
words[-1] += rhs_value | |
else: | |
words.append(rhs_value) | |
assert words | |
return " ".join(words) | |
def test_do_lookup(): | |
tests = [ | |
("THARBD", "that should"), | |
("THAEURBD", "that I should"), | |
("EULD", "I would"), | |
("THA*EUF", "that I've"), | |
("THRAEUGS", None), | |
("SR*U", None), | |
("SRUB", "have you be"), | |
("SR*UFR", "have you ever"), | |
("EUBG", "I can"), | |
("UR", "you are"), | |
("TKUPBT", None), | |
("SRURBD", None), | |
("STKPOPT", "and if I don't want"), | |
("STWHAOF", "so that you don't have"), | |
("WHAOEUL", None), | |
("SWOPBLG", "so I don't know"), | |
("STWHAOFTS", "so that you don't have to"), | |
] | |
for input, expected in tests: | |
output = do_lookup(input) | |
assert output == expected | |
def reverse_lookup(text): | |
return [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment