Skip to content

Instantly share code, notes, and snippets.

@masaponto

masaponto/dtalk.py

Last active Mar 9, 2018
Embed
What would you like to do?
#!/usr/bin/env python
import requests
import ffmpeg
import subprocess
class DTALK:
"""
This scripts depends on 'aplay' command
"""
def __init__(self, key, param_dict=None, tmp_dir="./tmp"):
self.key = key
self.url = "https://api.apigw.smt.docomo.ne.jp/aiTalk/v1/textToSpeech?APIKEY=" + key
self.tmp_dir = tmp_dir
if param_dict:
for req_elements in ['speaker', 'rate', 'pitch', 'range', 'volume']:
if req_elements not in param_dict:
raise ValueError(
"The element " + req_elements + " was not found. It must be included.")
if not isinstance(param_dict[req_elements], str):
raise ValueError("It must be string")
self.param_dict = param_dict
else:
self.param_dict = {
'speaker': 'sumire',
'pitch': '1.2',
'range': '1',
'rate': '1.3',
'volume': '2.0'
}
def generate_xml(self, text):
xml = '<?xml version="1.0" encoding="utf-8" ?>'
voice = '<voice name="' + self.param_dict["speaker"] + '">'
prosody = '<prosody rate="' + \
self.param_dict["rate"] + '" pitch="' + self.param_dict["pitch"] + \
'" range="' + self.param_dict["range"] + \
'" volume="' + self.param_dict["volume"] + '">'
xml += '<speak version="1.1">' + voice + prosody + text + \
'</prosody></voice></speak>'
return xml.encode("UTF-8")
def pcm2wav(self, pcm_path):
stream = ffmpeg.input(pcm_path, f="s16be", ar="16000", ac="1")
stream = ffmpeg.output(stream, self.tmp_dir + 'output.wav', loglevel=0)
stream = ffmpeg.overwrite_output(stream)
ffmpeg.run(stream)
def get_wav(self, text):
if not text:
text = "ちょっと何言っているかわからないですね!"
xml = self.generate_xml(text)
response = requests.post(
self.url,
data=xml,
headers={
'Content-Type': 'application/ssml+xml',
'Accept': 'audio/L16',
'Content-Length': str(len(xml))
})
if response.status_code != 200:
return
return response.content
def talk(self, text):
content = self.get_wav(text)
if not content:
return
with open(self.tmp_dir + "raw_file", 'wb') as f:
f.write(content)
self.pcm2wav(self.tmp_dir + "raw_file")
cmd = "aplay -q " + self.tmp_dir + "output.wav"
subprocess.call(cmd, shell=True)
def main():
API_KEY = "<your-api-key-goes-here>"
text = "今日はいい天気です。お腹が空きましたか?"
param_dict = {
'speaker': 'sumire',
'pitch': '1',
'range': '1',
'rate': '2.0',
'volume': '2.0'
}
dtalk = DTALK(key=API_KEY, param_dict=param_dict)
dtalk.talk(text)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment