Skip to content

Instantly share code, notes, and snippets.

@7shi
Created May 15, 2020 15:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 7shi/92794319bebef470dd71e68d4e3e599d to your computer and use it in GitHub Desktop.
Save 7shi/92794319bebef470dd71e68d4e3e599d to your computer and use it in GitHub Desktop.
[py] SSML converter for Esperanto
# CC0 http://creativecommons.org/publicdomain/zero/1.0/
phonemes = {}
def setphonemes(phs):
for ph in phs.split():
p1, p2 = ph.split(",")
phonemes[p1] = p2
setphonemes("a,A b,B c,TS ĉ,CH d,D e,E f,F g,G ĝ,JH h,H")
setphonemes("ĥ,X i,I j,J ĵ,ZH k,K l,L m,M n,N o,O p,P")
setphonemes("r,R s,S ŝ,SH t,T u,U ŭ,W v,V z,Z")
def getph(ch):
return phonemes[ch] if ch in phonemes else ""
def getphoneme(word):
return [ph for ch in word if (ph := getph(ch))]
if False:
print("# getphoneme")
for w in ["feliĉa", "ĝis"]:
print(w, "->", getphoneme(w))
class Parser:
def __init__(self, src):
self.i = iter(src)
self.cur = None
def peek(self):
if self.cur: return self.cur
try:
self.cur = next(self.i)
except StopIteration:
pass
return self.cur
def read(self):
ret = self.peek()
self.cur = None
return ret
def accept(self):
if self.cur:
self.cur = None
self.peek()
def isconsonant(ph):
return ph and not ph in "AEIOU"
def syllablize(phs):
p = Parser(reversed(phs))
ret = []
cur = []
while (ph := p.read()):
cur.insert(0, ph)
if isconsonant(ph): continue
if isconsonant(c1 := p.peek()):
p.accept()
cur.insert(0, c1)
if isconsonant(c2 := p.peek()):
if c1 in "LR" and c2 != c1:
p.accept()
cur.insert(0, c2)
else:
cc = c2 + c1
if cc in ["KV", "GV", "DZ"]:
p.accept()
cur.insert(0, c2)
ret.insert(0, cur)
cur = []
if cur:
if ret:
ret[0] = cur + ret[0]
else:
ret = [cur]
return ret
if False:
print("# syllablize")
for w in ["feliĉa", "ĝis", "akvo", "edzo", "lingvo", "patro", "strato"]:
print(w, "->", syllablize(getphoneme(w)))
def setaccent(syls):
if len(syls) >= 2:
syls[-2] = ["s1"] + syls[-2]
if not isconsonant(syls[-2][-1]):
syls[-2].append("lng")
return syls
if False:
print("# setaccent")
for w in ["feliĉa", "ĝis", "akvo", "edzo", "lingvo", "patro", "strato"]:
print(w, "->", setaccent(syllablize(getphoneme(w))))
def combine(syls):
return " . ".join(map(" ".join, syls))
xsis = [pair.split(",") for pair in "cx,ĉ gx,ĝ hx,ĥ jx,ĵ sx,ŝ ux,ŭ".split()]
def getups(word):
word = word.lower()
for a, b in xsis:
word = word.replace(a, b)
return combine(setaccent(syllablize(getphoneme(word)))).replace("W", "U")
if False:
print("# getups")
for w in ["Felicxa", "Gxis", "akvo", "lingvo", "patro", "strato", "hodiaux"]:
print(w, "->", getups(w))
def readtoken(p):
ch = p.peek()
if not ch: return None
ret = ""
while (ch := p.peek()) and str.isalpha(ch):
p.accept()
ret += ch
if ret: return (True, ret)
while (ch := p.peek()) and not str.isalpha(ch):
p.accept()
ret += ch
return (False, ret)
ssmlhdr = """
<?xml version="1.0" encoding="UTF-8"?>
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sk-SK">
""".lstrip()
def getssml(text):
ssml = ssmlhdr
p = Parser(text)
while (t := readtoken(p)):
alpha, token = t
if alpha:
ssml += '<phoneme alphabet="ups" ph="%s">%s</phoneme>' % (getups(token), token)
else:
ssml += token
if not ssml.endswith("\n"): ssml += "\n"
return ssml + '</speak>'
if False:
print("# getssml")
text = "Mi estas tre ĝoja konatiĝi kun vi."
print(text)
print("-" * 32)
print(getssml(text))
import getopt, sys
options = "f:"
def usage():
print("usage: %s -f file | text ..." % sys.argv[0])
exit(1)
if __name__ == "__main__":
text = None
try:
opts, args = getopt.getopt(sys.argv[1:], options)
except getopt.GetoptError as e:
print(e)
usage()
for opt, optarg in opts:
if opt == "-f":
with open(optarg, encoding="utf-8") as f:
text = f.read()
if not text: text = " ".join(args)
if not text: usage()
print(getssml(text))
@7shi
Copy link
Author

7shi commented May 16, 2020

このスクリプトは以下の記事で解説しています。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment