Skip to content

Instantly share code, notes, and snippets.

@7shi
Last active October 18, 2022 04:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 7shi/49fca97c08eb06edb6bed423502c784e to your computer and use it in GitHub Desktop.
Save 7shi/49fca97c08eb06edb6bed423502c784e to your computer and use it in GitHub Desktop.
[py] SSML converter for Ido
# CC0 http://creativecommons.org/publicdomain/zero/1.0/
phonemes = {}
phonemes2 = {}
def setphonemes(phs):
for ph in phs.split():
p1, p2 = ph.split(",")
phonemes[p1] = p2
if len(p1) > 1: phonemes2[p1[0]] = 1;
setphonemes("a,A b,B c,TS ch,CH d,D e,E f,F g,G h,H")
setphonemes("i,I j,ZH k,K l,L m,M n,N o,O p,P qu,KW")
setphonemes("r,R s,S sh,SH t,T u,U v,V w,W x,KS y,J z,Z")
tests = [
"mashino", "aquo", "linguo", "patro", "strato",
"serchar", "familio", "dio", "manuo", "frua"]
def getph(ch):
return phonemes[ch] if ch in phonemes else ""
class Parser:
def __init__(self, src):
self.i = iter(src)
self.cur = None
def peek(self):
if self.cur: return self.cur
try:
self.cur = next(self.i)
except StopIteration:
pass
return self.cur
def read(self):
ret = self.peek()
self.cur = None
return ret
def accept(self):
if self.cur:
self.cur = None
self.peek()
def getphoneme(word):
def g():
p = Parser(word)
while ch := p.read():
if ch in phonemes2:
ch2 = p.peek()
if ch2 and ch + ch2 in phonemes:
ch += p.read()
yield getph(ch)
return list(g())
if False:
print("# getphoneme")
for w in tests:
print(w, "->", getphoneme(w))
def isconsonant(ph):
return ph and not ph[0] in "AEIOU"
def syllablize(phs):
p = Parser(reversed(phs))
ret = []
cur = []
while (ph := p.read()):
cur.insert(0, ph)
if isconsonant(ph): continue
c1 = p.peek()
if isconsonant(c1):
p.accept()
cur.insert(0, c1)
if isconsonant(c2 := p.peek()):
if c1 in "LR" and c2 != c1:
p.accept()
cur.insert(0, c2)
else:
cc = c2 + c1
ret.insert(0, cur)
cur = []
if cur:
if ret:
ret[0] = cur + ret[0]
else:
ret = [cur]
if len(ret) >= 3 and not isconsonant(ret[-1][0]) and ret[-2][-1] in "IU": # diphthong
last = ret.pop()
ret[-1] += last
return ret
if False:
print("# syllablize")
for w in tests:
print(w, "->", syllablize(getphoneme(w)))
def setaccent(syls):
if len(syls) >= 2:
last = syls[-1]
if len(last) >= 2 and last[-2] + last[-1] == "AR": # infinitive
syls[-1] = ["s1"] + last
else:
syls[-2] = ["s1"] + syls[-2]
if not isconsonant(syls[-2][-1]):
syls[-2].append("lng")
return syls
if False:
print("# setaccent")
for w in tests:
print(w, "->", setaccent(syllablize(getphoneme(w))))
def combine(syls):
return " . ".join(map(" ".join, syls))
def getups(word):
word = word.lower()
ret = combine(setaccent(syllablize(getphoneme(word))))
return ret.replace("W", "U").replace("KU", "K U").replace("KS", "K S")
if False:
print("# getups")
for w in tests:
print(w, "->", getups(w))
def readtoken(p):
ch = p.peek()
if not ch: return None
ret = ""
while (ch := p.peek()) and str.isalpha(ch) or ch == "'":
p.accept()
ret += ch
if ret: return (True, ret)
while (ch := p.peek()) and not str.isalpha(ch):
p.accept()
ret += ch
return (False, ret)
ssmlhdr = """
<?xml version="1.0" encoding="UTF-8"?>
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sk-SK">
""".lstrip()
def getssml(text):
ssml = ssmlhdr
p = Parser(text)
while (t := readtoken(p)):
alpha, token = t
if alpha:
ssml += '<phoneme alphabet="ups" ph="%s">%s</phoneme>' % (getups(token), token)
else:
ssml += token
if not ssml.endswith("\n"): ssml += "\n"
return ssml + '</speak>'
if False:
print("# getssml")
text = "L'amiko serchas la familio."
print(text)
print("-" * 32)
print(getssml(text))
import getopt, sys
options = "f:"
def usage():
print("usage: %s -f file | text ..." % sys.argv[0])
exit(1)
if __name__ == "__main__":
text = None
try:
opts, args = getopt.getopt(sys.argv[1:], options)
except getopt.GetoptError as e:
print(e)
usage()
for opt, optarg in opts:
if opt == "-f":
with open(optarg, encoding="utf-8") as f:
text = f.read()
if not text: text = " ".join(args)
if not text: usage()
print(getssml(text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment