Skip to content

Instantly share code, notes, and snippets.

@reuben
Last active November 7, 2023 06:30
Show Gist options
  • Save reuben/5f483772864c1ac9687ba1c7aa3cb11f to your computer and use it in GitHub Desktop.
Save reuben/5f483772864c1ac9687ba1c7aa3cb11f to your computer and use it in GitHub Desktop.
"""
Example of how to synthesize speech using the Coqui Studio API.
Streams the download/playback of the audio.
Usage:
$ COQUI_API_TOKEN="put your API token here" python coqui_api_stream.py --text "Hi there!"
To specify the voice to use, pass eg: `--voice 98d4af7d-aca0-4a70-a26e-4ca59023a248`
To save the audio to a file after playback, pass `--save_dest audio.wav`. Audio will be
written to the specified file path.
To use the V1 model instead of XTTS, pass `--model v1`.
"""
import argparse
import shutil
import subprocess
import requests
import os
from typing import Iterator
def is_installed(lib_name: str) -> bool:
lib = shutil.which(lib_name)
if lib is None:
return False
return True
def play(audio: bytes) -> None:
if not is_installed("ffplay"):
message = (
"ffplay from ffmpeg not found, necessary to play audio. "
"On mac you can install it with 'brew install ffmpeg'. "
"On linux and windows you can install it from https://ffmpeg.org/"
)
raise ValueError(message)
args = ["ffplay", "-autoexit", "-", "-nodisp"]
proc = subprocess.Popen(
args=args,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
)
out, err = proc.communicate(input=audio)
proc.poll()
def save(audio: bytes, filename: str) -> None:
with open(filename, "wb") as f:
f.write(audio)
def stream(audio_stream: Iterator[bytes]) -> bytes:
if not is_installed("mpv"):
message = (
"mpv not found, necessary to stream audio. "
"On mac you can install it with 'brew install mpv'. "
"On linux and windows you can install it from https://mpv.io/"
)
raise ValueError(message)
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
mpv_process = subprocess.Popen(
mpv_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
audio = b""
for chunk in audio_stream:
if chunk is not None:
mpv_process.stdin.write(chunk) # type: ignore
mpv_process.stdin.flush() # type: ignore
audio += chunk
if mpv_process.stdin:
mpv_process.stdin.close()
mpv_process.wait()
return audio
try:
COQUI_API_TOKEN = os.environ["COQUI_API_TOKEN"]
except KeyError:
raise RuntimeError("Set COQUI_API_TOKEN environment variable to your API key")
def tts(text: str, voice_id: str, model: str = "xtts") -> Iterator[bytes]:
if model == "xtts":
url = "https://app.coqui.ai/api/v2/samples/xtts/render/?format=wav"
else:
url = "https://app.coqui.ai/api/v2/samples?format=wav"
res = requests.post(
url,
json={"text": text, "voice_id": voice_id},
headers={"Authorization": f"Bearer {COQUI_API_TOKEN}"},
)
for chunk in res.iter_content(chunk_size=2048):
if chunk:
yield chunk
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--text", required=True, help="Text to synthesize")
parser.add_argument(
"--voice",
help="ID of the voice to use for synthesis",
default="98d4af7d-aca0-4a70-a26e-4ca59023a248",
)
parser.add_argument(
"--save_dest", help="Optional path to save audio file to, after playback."
)
parser.add_argument(
"--model",
help="Which model to use for synthesis (V1 or XTTS)",
choices=["v1", "xtts"],
)
args = parser.parse_args()
text = args.text
voice_id = args.voice
save_dest = args.save_dest
audio = stream(tts(text, voice_id, args.model))
if save_dest:
save(audio, save_dest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment