Instantly share code, notes, and snippets.

Embed
What would you like to do?
multiword_briefs.py
import re
LONGEST_KEY = 1
PARTS_RE = re.compile(r"""
^
(?P<lhs> S? T? K? P? W? H? R?
A? O?)
(?P<middle> (\* | - | ))
(?P<subject> (E? U?))
(?P<rhs> F? R? P? B? L? G? T? S? D? Z?)
$
""", re.VERBOSE)
BLACKLIST = {
"EUPL", # prefer "{im^}" over "I'm"
"*EUF", # prefer "{^ive}" over "I've"
"THAPBT", # prefer "than the" over "that don't"
"TKOPBLG", # prefer "dodge" over "do know"
"KEUPBG", # prefer "king" over "can I think"
"KWROUPBG", # prefer "young" over "I don't you think"
"KWRAO*URL", # prefer "URL" over "you don't you really"
"EUFT", # prefer "{^ist}" over "I have the"
"TKUB", # prefer "dub" over "do you be"
}
assert all(PARTS_RE.match(i) for i in BLACKLIST)
SUBJECTS = {
"": None,
"U": "you",
"EU": "I",
}
SUFFIXES = {
"*D": "'d",
"*L": "'ll",
"*PL": ("'m", ["I"]),
"*R": ("'re", [None, "you"]),
"*F": "'ve",
"-R": ("are", [None, "you"]),
"-B": "be",
"-BG": "can",
"-BGT": "can't",
"-BGD": "could",
"*BGT": "couldn't",
"*PBT": "didn't",
"-PBT": "don't",
"*FPB": "even",
"*FR": "ever",
"-RBGT": "get",
"-RBGTS": "get to",
"*RBGT": "got",
"*RBGTS": "got to",
"-D": "had",
"-F": "have",
"-FT": "have the",
"-FTS": "have to",
"-FPBT": "haven't",
"-PBLG": "know",
"*PBLG": "knew",
"*PBL": "mean",
"*PBLTS": "mean to",
"-PLT": "might",
"*PBD": "need",
"-PBL": "only",
"*RL": "really",
"-RBD": "should",
"*RBT": "shouldn't",
"-PBG": "think",
"-RPBD": "understand",
"*RPBD": "understood",
"-PT": "want",
"-PTS": "want to",
"-FS": ("was", [None, "I"]),
"*FBT": ("wasn't", [None, "I"]),
"-RP": "were",
"-RPTS": "were to",
"*RPT": "weren't",
"-L": "will",
"-FBT": "won't",
"-LD": "would",
"*LT": "wouldn't",
}
ADVERB = {"ever", "even", "only", "really"}
PRESENT_NON_AUX = {"be", "get", "go", "have", "know", "mean", "need", "recall",
"think", "understand", "want"}
PREFIXES = {
"SKP": "and",
"SKPO": "and I don't",
"SKPAO": "and you don't",
"STKP": "and if",
"STKPO": "and if I don't",
"STKPAO": "and if you don't",
"K": ("can", ADVERB | PRESENT_NON_AUX),
"KO": ("could", ADVERB | PRESENT_NON_AUX),
"TK": ("did", ADVERB | PRESENT_NON_AUX),
"TKO": ("do", ADVERB | PRESENT_NON_AUX),
"SR": ("have", ADVERB | PRESENT_NON_AUX | {"got", "had"}),
"KWRO": ("I don't", ADVERB | PRESENT_NON_AUX),
"KWRAO": ("you don't", ADVERB | PRESENT_NON_AUX),
"STP": "if",
"STPO": "if I don't",
"STPAO": "if you don't",
"STHA": "is that",
"SWHA": "is what",
"STHO": "is that I don't",
"STHAO": "is that you don't",
"SHO": ("should", ADVERB | PRESENT_NON_AUX),
"SW": "so",
"SWO": "so I don't",
"SWAO": "so you don't",
"STPW": "so if",
"STPWO": "so if I don't",
"STPWAO": "so if you don't",
"STWHA": "so that",
"STWHO": "so that I don't",
"STWHAO": "so that you don't",
"SWHA": "so what",
"THA": "that",
"THAO": "that you don't",
"THO": "that I don't",
"WHA": "what",
"WHO": "what I don't",
"WHAO": "what you don't",
"WO": ("would", ADVERB | PRESENT_NON_AUX),
}
def lookup(key):
stroke = key[0]
try:
translation = do_lookup(stroke)
except Exception as e:
# Don't mask KeyErrors that arise because of my programming mistakes.
raise RuntimeError("couldn't translate multiword brief") from e
if not translation:
raise KeyError()
return translation
def do_lookup(stroke):
if stroke in BLACKLIST:
return None
parts = PARTS_RE.match(stroke)
if not parts:
return None
parts = parts.groupdict()
lhs = parts["lhs"]
middle = parts["middle"]
subject = parts["subject"]
rhs = parts["rhs"]
assert not any("*" in i or "-" in i for i in SUBJECTS)
if middle:
rhs = middle + rhs
else:
rhs = "-" + rhs
lhs_value = PREFIXES.get(lhs)
subject_value = SUBJECTS.get(subject)
rhs_value = SUFFIXES.get(rhs)
if isinstance(rhs_value, tuple):
rhs_value, allowed_subjects = rhs_value
if subject_value not in allowed_subjects:
return None
if isinstance(lhs_value, tuple):
lhs_value, allowed_suffixes = lhs_value
if rhs_value and rhs_value not in allowed_suffixes:
return None
if lhs and not lhs_value:
return None
if subject and not subject_value:
return None
if rhs and not rhs_value:
return None
num_values = sum(
int(i is not None)
for i in [lhs_value, subject_value, rhs_value]
)
if num_values < 2:
return None
# Bit of a hack to support multiple-word left-hand sides.
if lhs_value and len(lhs_value.split()) >= 3 and subject:
return None
words = []
if lhs_value:
words.append(lhs_value)
if subject_value:
words.append(subject_value)
if rhs_value:
if rhs_value.startswith("'"):
assert words
words[-1] += rhs_value
else:
words.append(rhs_value)
assert words
return " ".join(words)
def test_do_lookup():
tests = [
("THARBD", "that should"),
("THAEURBD", "that I should"),
("EULD", "I would"),
("THA*EUF", "that I've"),
("THRAEUGS", None),
("SR*U", None),
("SRUB", "have you be"),
("SR*UFR", "have you ever"),
("EUBG", "I can"),
("UR", "you are"),
("TKUPBT", None),
("SRURBD", None),
("STKPOPT", "and if I don't want"),
("STWHAOF", "so that you don't have"),
("WHAOEUL", None),
("SWOPBLG", "so I don't know"),
("STWHAOFTS", "so that you don't have to"),
]
for input, expected in tests:
output = do_lookup(input)
assert output == expected
def reverse_lookup(text):
return []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment