arxanas/multiword_briefs.py Secret

## multiword_briefs.py
import re

LONGEST_KEY = 1

PARTS_RE = re.compile(r"""
    ^
    (?P<lhs> S? T? K? P? W? H? R?
             A? O?)
    (?P<middle> (\* | - | ))
    (?P<subject> (E? U?))
    (?P<rhs> F? R? P? B? L? G? T? S? D? Z?)
    $
""", re.VERBOSE)

BLACKLIST = {
    "EUPL", # prefer "{im^}" over "I'm"
    "*EUF", # prefer "{^ive}" over "I've"
    "THAPBT", # prefer "than the" over "that don't"
    "TKOPBLG", # prefer "dodge" over "do know"
    "KEUPBG", # prefer "king" over "can I think"
    "KWROUPBG", # prefer "young" over "I don't you think"
    "KWRAO*URL", # prefer "URL" over "you don't you really"
    "EUFT", # prefer "{^ist}" over "I have the"
    "TKUB", # prefer "dub" over "do you be"
}
assert all(PARTS_RE.match(i) for i in BLACKLIST)

SUBJECTS = {
    "": None,
    "U": "you",
    "EU": "I",
}

SUFFIXES = {
    "*D": "'d",
    "*L": "'ll",
    "*PL": ("'m", ["I"]),
    "*R": ("'re", [None, "you"]),
    "*F": "'ve",
    "-R": ("are", [None, "you"]),
    "-B": "be",
    "-BG": "can",
    "-BGT": "can't",
    "-BGD": "could",
    "*BGT": "couldn't",
    "*PBT": "didn't",
    "-PBT": "don't",
    "*FPB": "even",
    "*FR": "ever",
    "-RBGT": "get",
    "-RBGTS": "get to",
    "*RBGT": "got",
    "*RBGTS": "got to",
    "-D": "had",
    "-F": "have",
    "-FT": "have the",
    "-FTS": "have to",
    "-FPBT": "haven't",
    "-PBLG": "know",
    "*PBLG": "knew",
    "*PBL": "mean",
    "*PBLTS": "mean to",
    "-PLT": "might",
    "*PBD": "need",
    "-PBL": "only",
    "*RL": "really",
    "-RBD": "should",
    "*RBT": "shouldn't",
    "-PBG": "think",
    "-RPBD": "understand",
    "*RPBD": "understood",
    "-PT": "want",
    "-PTS": "want to",
    "-FS": ("was", [None, "I"]),
    "*FBT": ("wasn't", [None, "I"]),
    "-RP": "were",
    "-RPTS": "were to",
    "*RPT": "weren't",
    "-L": "will",
    "-FBT": "won't",
    "-LD": "would",
    "*LT": "wouldn't",
}

ADVERB = {"ever", "even", "only", "really"}
PRESENT_NON_AUX = {"be", "get", "go", "have", "know", "mean", "need", "recall",
        "think", "understand", "want"}

PREFIXES = {
    "SKP": "and",
    "SKPO": "and I don't",
    "SKPAO": "and you don't",
    "STKP": "and if",
    "STKPO": "and if I don't",
    "STKPAO": "and if you don't",
    "K": ("can", ADVERB | PRESENT_NON_AUX),
    "KO": ("could", ADVERB | PRESENT_NON_AUX),
    "TK": ("did", ADVERB | PRESENT_NON_AUX),
    "TKO": ("do", ADVERB | PRESENT_NON_AUX),
    "SR": ("have", ADVERB | PRESENT_NON_AUX | {"got", "had"}),
    "KWRO": ("I don't", ADVERB | PRESENT_NON_AUX),
    "KWRAO": ("you don't", ADVERB | PRESENT_NON_AUX),
    "STP": "if",
    "STPO": "if I don't",
    "STPAO": "if you don't",
    "STHA": "is that",
    "SWHA": "is what",
    "STHO": "is that I don't",
    "STHAO": "is that you don't",
    "SHO": ("should", ADVERB | PRESENT_NON_AUX),
    "SW": "so",
    "SWO": "so I don't",
    "SWAO": "so you don't",
    "STPW": "so if",
    "STPWO": "so if I don't",
    "STPWAO": "so if you don't",
    "STWHA": "so that",
    "STWHO": "so that I don't",
    "STWHAO": "so that you don't",
    "SWHA": "so what",
    "THA": "that",
    "THAO": "that you don't",
    "THO": "that I don't",
    "WHA": "what",
    "WHO": "what I don't",
    "WHAO": "what you don't",
    "WO": ("would", ADVERB | PRESENT_NON_AUX),
}

def lookup(key):
    stroke = key[0]
    try:
        translation = do_lookup(stroke)
    except Exception as e:
        # Don't mask KeyErrors that arise because of my programming mistakes.
        raise RuntimeError("couldn't translate multiword brief") from e
    if not translation:
        raise KeyError()
    return translation


def do_lookup(stroke):
    if stroke in BLACKLIST:
        return None

    parts = PARTS_RE.match(stroke)
    if not parts:
        return None
    parts = parts.groupdict()
    lhs = parts["lhs"]
    middle = parts["middle"]
    subject = parts["subject"]
    rhs = parts["rhs"]

    assert not any("*" in i or "-" in i for i in SUBJECTS)
    if middle:
        rhs = middle + rhs
    else:
        rhs = "-" + rhs

    lhs_value = PREFIXES.get(lhs)
    subject_value = SUBJECTS.get(subject)
    rhs_value = SUFFIXES.get(rhs)

    if isinstance(rhs_value, tuple):
        rhs_value, allowed_subjects = rhs_value
        if subject_value not in allowed_subjects:
            return None
    if isinstance(lhs_value, tuple):
        lhs_value, allowed_suffixes = lhs_value
        if rhs_value and rhs_value not in allowed_suffixes:
            return None

    if lhs and not lhs_value:
        return None
    if subject and not subject_value:
        return None
    if rhs and not rhs_value:
        return None

    num_values = sum(
        int(i is not None)
        for i in [lhs_value, subject_value, rhs_value]
    )
    if num_values < 2:
        return None

    # Bit of a hack to support multiple-word left-hand sides.
    if lhs_value and len(lhs_value.split()) >= 3 and subject:
        return None

    words = []
    if lhs_value:
        words.append(lhs_value)
    if subject_value:
        words.append(subject_value)
    if rhs_value:
        if rhs_value.startswith("'"):
            assert words
            words[-1] += rhs_value
        else:
            words.append(rhs_value)

    assert words
    return " ".join(words)


def test_do_lookup():
    tests = [
        ("THARBD", "that should"),
        ("THAEURBD", "that I should"),
        ("EULD", "I would"),
        ("THA*EUF", "that I've"),
        ("THRAEUGS", None),
        ("SR*U", None),
        ("SRUB", "have you be"),
        ("SR*UFR", "have you ever"),
        ("EUBG", "I can"),
        ("UR", "you are"),
        ("TKUPBT", None),
        ("SRURBD", None),
        ("STKPOPT", "and if I don't want"),
        ("STWHAOF", "so that you don't have"),
        ("WHAOEUL", None),
        ("SWOPBLG", "so I don't know"),
        ("STWHAOFTS", "so that you don't have to"),
    ]
    for input, expected in tests:
        output = do_lookup(input)
        assert output == expected


def reverse_lookup(text):
    return []
	import re

	LONGEST_KEY = 1

	PARTS_RE = re.compile(r"""
	^
	(?P<lhs> S? T? K? P? W? H? R?
	A? O?)
	(?P<middle> (\* \| - \| ))
	(?P<subject> (E? U?))
	(?P<rhs> F? R? P? B? L? G? T? S? D? Z?)
	$
	""", re.VERBOSE)

	BLACKLIST = {
	"EUPL", # prefer "{im^}" over "I'm"
	"*EUF", # prefer "{^ive}" over "I've"
	"THAPBT", # prefer "than the" over "that don't"
	"TKOPBLG", # prefer "dodge" over "do know"
	"KEUPBG", # prefer "king" over "can I think"
	"KWROUPBG", # prefer "young" over "I don't you think"
	"KWRAO*URL", # prefer "URL" over "you don't you really"
	"EUFT", # prefer "{^ist}" over "I have the"
	"TKUB", # prefer "dub" over "do you be"
	}
	assert all(PARTS_RE.match(i) for i in BLACKLIST)

	SUBJECTS = {
	"": None,
	"U": "you",
	"EU": "I",
	}

	SUFFIXES = {
	"*D": "'d",
	"*L": "'ll",
	"*PL": ("'m", ["I"]),
	"*R": ("'re", [None, "you"]),
	"*F": "'ve",
	"-R": ("are", [None, "you"]),
	"-B": "be",
	"-BG": "can",
	"-BGT": "can't",
	"-BGD": "could",
	"*BGT": "couldn't",
	"*PBT": "didn't",
	"-PBT": "don't",
	"*FPB": "even",
	"*FR": "ever",
	"-RBGT": "get",
	"-RBGTS": "get to",
	"*RBGT": "got",
	"*RBGTS": "got to",
	"-D": "had",
	"-F": "have",
	"-FT": "have the",
	"-FTS": "have to",
	"-FPBT": "haven't",
	"-PBLG": "know",
	"*PBLG": "knew",
	"*PBL": "mean",
	"*PBLTS": "mean to",
	"-PLT": "might",
	"*PBD": "need",
	"-PBL": "only",
	"*RL": "really",
	"-RBD": "should",
	"*RBT": "shouldn't",
	"-PBG": "think",
	"-RPBD": "understand",
	"*RPBD": "understood",
	"-PT": "want",
	"-PTS": "want to",
	"-FS": ("was", [None, "I"]),
	"*FBT": ("wasn't", [None, "I"]),
	"-RP": "were",
	"-RPTS": "were to",
	"*RPT": "weren't",
	"-L": "will",
	"-FBT": "won't",
	"-LD": "would",
	"*LT": "wouldn't",
	}

	ADVERB = {"ever", "even", "only", "really"}
	PRESENT_NON_AUX = {"be", "get", "go", "have", "know", "mean", "need", "recall",
	"think", "understand", "want"}

	PREFIXES = {
	"SKP": "and",
	"SKPO": "and I don't",
	"SKPAO": "and you don't",
	"STKP": "and if",
	"STKPO": "and if I don't",
	"STKPAO": "and if you don't",
	"K": ("can", ADVERB \| PRESENT_NON_AUX),
	"KO": ("could", ADVERB \| PRESENT_NON_AUX),
	"TK": ("did", ADVERB \| PRESENT_NON_AUX),
	"TKO": ("do", ADVERB \| PRESENT_NON_AUX),
	"SR": ("have", ADVERB \| PRESENT_NON_AUX \| {"got", "had"}),
	"KWRO": ("I don't", ADVERB \| PRESENT_NON_AUX),
	"KWRAO": ("you don't", ADVERB \| PRESENT_NON_AUX),
	"STP": "if",
	"STPO": "if I don't",
	"STPAO": "if you don't",
	"STHA": "is that",
	"SWHA": "is what",
	"STHO": "is that I don't",
	"STHAO": "is that you don't",
	"SHO": ("should", ADVERB \| PRESENT_NON_AUX),
	"SW": "so",
	"SWO": "so I don't",
	"SWAO": "so you don't",
	"STPW": "so if",
	"STPWO": "so if I don't",
	"STPWAO": "so if you don't",
	"STWHA": "so that",
	"STWHO": "so that I don't",
	"STWHAO": "so that you don't",
	"SWHA": "so what",
	"THA": "that",
	"THAO": "that you don't",
	"THO": "that I don't",
	"WHA": "what",
	"WHO": "what I don't",
	"WHAO": "what you don't",
	"WO": ("would", ADVERB \| PRESENT_NON_AUX),
	}

	def lookup(key):
	stroke = key[0]
	try:
	translation = do_lookup(stroke)
	except Exception as e:
	# Don't mask KeyErrors that arise because of my programming mistakes.
	raise RuntimeError("couldn't translate multiword brief") from e
	if not translation:
	raise KeyError()
	return translation


	def do_lookup(stroke):
	if stroke in BLACKLIST:
	return None

	parts = PARTS_RE.match(stroke)
	if not parts:
	return None
	parts = parts.groupdict()
	lhs = parts["lhs"]
	middle = parts["middle"]
	subject = parts["subject"]
	rhs = parts["rhs"]

	assert not any("*" in i or "-" in i for i in SUBJECTS)
	if middle:
	rhs = middle + rhs
	else:
	rhs = "-" + rhs

	lhs_value = PREFIXES.get(lhs)
	subject_value = SUBJECTS.get(subject)
	rhs_value = SUFFIXES.get(rhs)

	if isinstance(rhs_value, tuple):
	rhs_value, allowed_subjects = rhs_value
	if subject_value not in allowed_subjects:
	return None
	if isinstance(lhs_value, tuple):
	lhs_value, allowed_suffixes = lhs_value
	if rhs_value and rhs_value not in allowed_suffixes:
	return None

	if lhs and not lhs_value:
	return None
	if subject and not subject_value:
	return None
	if rhs and not rhs_value:
	return None

	num_values = sum(
	int(i is not None)
	for i in [lhs_value, subject_value, rhs_value]
	)
	if num_values < 2:
	return None

	# Bit of a hack to support multiple-word left-hand sides.
	if lhs_value and len(lhs_value.split()) >= 3 and subject:
	return None

	words = []
	if lhs_value:
	words.append(lhs_value)
	if subject_value:
	words.append(subject_value)
	if rhs_value:
	if rhs_value.startswith("'"):
	assert words
	words[-1] += rhs_value
	else:
	words.append(rhs_value)

	assert words
	return " ".join(words)


	def test_do_lookup():
	tests = [
	("THARBD", "that should"),
	("THAEURBD", "that I should"),
	("EULD", "I would"),
	("THA*EUF", "that I've"),
	("THRAEUGS", None),
	("SR*U", None),
	("SRUB", "have you be"),
	("SR*UFR", "have you ever"),
	("EUBG", "I can"),
	("UR", "you are"),
	("TKUPBT", None),
	("SRURBD", None),
	("STKPOPT", "and if I don't want"),
	("STWHAOF", "so that you don't have"),
	("WHAOEUL", None),
	("SWOPBLG", "so I don't know"),
	("STWHAOFTS", "so that you don't have to"),
	]
	for input, expected in tests:
	output = do_lookup(input)
	assert output == expected


	def reverse_lookup(text):
	return []