Skip to content

Instantly share code, notes, and snippets.

@kylebgorman
Created November 8, 2019 16:44
Show Gist options
  • Save kylebgorman/124909662f1abdab9a97ef06237c557d to your computer and use it in GitHub Desktop.
Save kylebgorman/124909662f1abdab9a97ef06237c557d to your computer and use it in GitHub Desktop.
Spanish pronunciation rules in Pynini
from typing import Optional
from pynini import *
def _rule(tau: Fst, sigma: Fst) -> Fst:
return cdrewrite(tau, "", "", sigma).optimize()
def _right_rule(tau: Fst, right: Fst, sigma: Fst) -> Fst:
return cdrewrite(tau, "", right, sigma).optimize()
# Alphabet.
front_v = union("i", "e")
back_v = union("a", "o", "u")
ph_v = front_v | back_v
gr_v = union("á", "é", "í", "ó", "ú", "ý")
ph_c = union("b", "tʃ", "d", "f", "g", "j", "k", "l", "ʝ", "m",
"n", "ɲ", "p", "r", "ɾ", "R", "s", "t", "w", "x", "z")
gr_c = union("c", "h", "ñ", "q", "v", "y")
# Closure over the alphabet---neeeded to compile rewrite rules.
sigma = closure(ph_v | gr_v | ph_c | gr_c).optimize()
## START OF PART 4.
u_rules = _rule(string_map([
# Digraphs.
("ch", "tʃ"),
("ll", "ʝ"),
("qu", "k"),
# Removes acute accents.
("á", "a"),
("é", "e"),
("í", "i"),
("ó", "o"),
("ú", "u"),
# Other unconditioned rules.
("v", "b"),
("ñ", "ɲ"),
]
), sigma)
# TODO(kbg): This is not quite right.
r_rules = (
_rule(transducer("rr", "R"), sigma) @
_rule(transducer("r", "ɾ"), sigma) @
_rule(transducer("R", "r"), sigma)
).optimize()
b_rules = (
_rule(transducer("x", "ks"), sigma) @
_rule(transducer("j", "x"), sigma) @
_rule(transducer(union("ý", "y"), "j"), sigma) @
_rule(transducer("hi", "j"), sigma) @
_rule(transducer("hu", "w"), sigma) @
_rule(transducer("h", ""), sigma)
).optimize()
k_rules = (
_right_rule(transducer("g", "x"), front_v, sigma) @
_right_rule(transducer("gu", "g"), front_v, sigma) @
_rule(transducer("gü", "gw"), sigma) @
_right_rule(transducer("c", "s"), front_v, sigma) @
_right_rule(transducer("cu", "kw"), ph_v, sigma) @
_rule(transducer("c", "k"), sigma)
)
g2p = optimize(u_rules @ r_rules @ b_rules @ k_rules)
def _rewrite(s: str, rule: Fst) -> Optional[str]:
lattice = compose(s, rule)
if lattice.start() == NO_STATE_ID:
return
return shortestpath(lattice).stringify()
assert _rewrite("cuando", g2p) == "kwando"
assert _rewrite("hierba", g2p) == "jeɾba"
assert _rewrite("hacer", g2p) == "aseɾ"
assert _rewrite("llave", g2p) == "ʝabe"
assert _rewrite("pero", g2p) == "peɾo"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment