Skip to content

Instantly share code, notes, and snippets.

@canard0328
Created April 15, 2018 11:02
Show Gist options
  • Save canard0328/59378a5b2805d3c34770f8af506ef417 to your computer and use it in GitHub Desktop.
Save canard0328/59378a5b2805d3c34770f8af506ef417 to your computer and use it in GitHub Desktop.
Synthesize text for English shadowing training using Google Could TTS (WaveNet)
# coding: utf-8
def synthesize_text(text, path):
import os
from google.cloud import texttospeech
from pydub import AudioSegment
client = texttospeech.TextToSpeechClient()
input_text = texttospeech.types.SynthesisInput(text=text)
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US', name='en-US-Wavenet-D')
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
speaking_rate=0.8)
response = client.synthesize_speech(input_text, voice, audio_config)
with open(path + '.wav', 'wb') as out:
out.write(response.audio_content)
audio = AudioSegment.from_wav(path + '.wav')
silence =AudioSegment.silent(duration=1000)
audio += silence
audio.export(path + '.mp3', format='mp3')
os.remove(path + '.wav')
print(path)
if __name__ == '__main__':
with open('scripts.txt', 'r') as fi:
for line in fi:
path, text = line.rstrip().split('\t')
synthesize_text(text, path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment