Skip to content

Instantly share code, notes, and snippets.

@aniline
Last active July 19, 2020 08:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aniline/b477bb2a6485401d66f6d49f09ed4c9a to your computer and use it in GitHub Desktop.
Save aniline/b477bb2a6485401d66f6d49f09ed4c9a to your computer and use it in GitHub Desktop.
WIP scriptlet to use using Google cloud TTS api. For voice over and stuff.
#!/usr/bin/env python3
#
# usage: tts.py [-h] [-f SPEC_FILE] [ssml [ssml ...]]
#
# positional arguments:
# ssml ssml message file
#
# optional arguments:
# -h, --help show this help message and exit
# -f SPEC_FILE, --spec-file SPEC_FILE
# Synthesis specification file(s)
#
# The 'SPEC_FILE' is a json file of the form.
# {
# "synth" : [
# { "file" : "message1.ssml" },
# { "file" : "message2.ssml" }
# ]
# }
#
# Tries to create message1.mp3, message2.mp3 relative to the folder where the json file is.
#
# the ssml files passed on the command line are used to synthesize audio withe same basename and .mp3 extension.
#
import argparse
import json
import traceback
import sys
from pathlib import Path
from google.cloud import texttospeech
def render(client, voice, audio_config, input_text, output_file):
synthesis_input = texttospeech.SynthesisInput(ssml=input_text)
response = client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_config
)
with output_file.open("wb") as out:
out.write(response.audio_content)
print(f'Audio content written to file "{output_file}.mp3"')
def process(specfile, ssmls):
try:
specdir = Path(specfile).parent
synth = json.load(open(specfile))["synth"]
spec = [{ "file" : specdir.joinpath(i["file"]) } for i in synth if "file" in i]
spec.extend([{ "output_file" : specdir.joinpath(i["output_file"]), "text" : i["text"] } for i in synth if "text" in i])
print(f"Processing file {specfile}");
except Exception as e:
print(e)
spec = []
spec.extend([{ 'file' : Path(ssml) } for ssml in ssmls])
client = texttospeech.TextToSpeechClient()
voice = texttospeech.VoiceSelectionParams(
language_code="en-IN", name="en-IN-Wavenet-D", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=0.9
)
print(spec)
for k in spec:
if "file" in k:
input_file = k["file"]
with input_file.open() as inpf:
input_text = inpf.read()
output_file = input_file.with_suffix(".mp3")
print(f"Input {input_file}, output {output_file}")
render(client, voice, audio_config, input_text, output_file)
if "text" in k:
if "output_file" in k:
output_file = Path(k["output_file"]).with_suffix(".mp3")
input_text = k["text"]
print(f"Input <text>, output {output_file}")
render(client, voice, audio_config, input_text, output_file)
else:
print("Not output filename for text item:")
print(k["text"])
continue
def setup_arguments(parser):
parser.add_argument("-f", '--spec-file', help="Synthesis specification file(s)")
parser.add_argument("ssml", nargs='*', help="ssml message file")
def main():
global args
parser = argparse.ArgumentParser()
setup_arguments(parser)
try:
args = parser.parse_args()
ssmls = [] + args.ssml
process(args.spec_file, ssmls)
except Exception as e:
print(f"{e.__class__.__name__}:", ', '.join([str(a) for a in e.args]))
traceback.print_tb(e)
return 1
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment