jcboyd/tts_translate.py

## tts_translate.py
import os
import cStringIO
import requests
import wave


# acquire credentials at https://www.ibm.com/watson/services/text-to-speech/
user_name = ''
password = ''

url = 'https://stream.watsonplatform.net/text-to-speech/api/v1/synthesize'


def create_audio(text, voice):

    args = {'accept' : 'audio/wav', 'text' : text, 'voice' : voice}
    r = requests.get(url, auth=(user_name, password), params=args)

    return cStringIO.StringIO(r.content)

def main():

    """The vocab list is assumed to be a text file with the format:
        [french word/s 1] - [english word/s 1]
        [french word/s 2] - [english word/s 2]
        [french word/s 2] - [english word/s 3]
        ...
    """

    input_file = '/Users/jcboyd/Dropbox/vocab_list.txt'
    output_dir = '/Users/jcboyd/Dropbox/vocab_audio'

    delimeter = '%20'

    f = open(input_file)
    vocab = map(lambda x : x.strip('\n').split(' - '), f.readlines())

    for i, (french, english) in enumerate(vocab):

        print('Writing %s...' % french)

        fr = create_audio(french.replace(' ', delimeter), 'fr-FR_ReneeVoice')
        en = create_audio(english.replace(' ', delimeter), 'en-GB_KateVoice')

        fr_len = len(fr.getvalue())
        en_len = len(en.getvalue())

        fr_wav = wave.open(fr, 'rb')
        en_wav = wave.open(en, 'rb')

        output = wave.open(os.path.join(output_dir, '%d.wav' % i), 'wb')

        # modify params -- wave allocates 2Gb of memory by default
        params = fr_wav.getparams()
        params = params[:3] + tuple([fr_len + en_len]) + params[4:]

        output.setparams(params)
        output.writeframes(fr_wav.readframes(fr_len))
        output.writeframes(en_wav.readframes(en_len))
        output.close()


if __name__ == '__main__':
    main()
	import os
	import cStringIO
	import requests
	import wave


	# acquire credentials at https://www.ibm.com/watson/services/text-to-speech/
	user_name = ''
	password = ''

	url = 'https://stream.watsonplatform.net/text-to-speech/api/v1/synthesize'


	def create_audio(text, voice):

	args = {'accept' : 'audio/wav', 'text' : text, 'voice' : voice}
	r = requests.get(url, auth=(user_name, password), params=args)

	return cStringIO.StringIO(r.content)

	def main():

	"""The vocab list is assumed to be a text file with the format:
	[french word/s 1] - [english word/s 1]
	[french word/s 2] - [english word/s 2]
	[french word/s 2] - [english word/s 3]
	...
	"""

	input_file = '/Users/jcboyd/Dropbox/vocab_list.txt'
	output_dir = '/Users/jcboyd/Dropbox/vocab_audio'

	delimeter = '%20'

	f = open(input_file)
	vocab = map(lambda x : x.strip('\n').split(' - '), f.readlines())

	for i, (french, english) in enumerate(vocab):

	print('Writing %s...' % french)

	fr = create_audio(french.replace(' ', delimeter), 'fr-FR_ReneeVoice')
	en = create_audio(english.replace(' ', delimeter), 'en-GB_KateVoice')

	fr_len = len(fr.getvalue())
	en_len = len(en.getvalue())

	fr_wav = wave.open(fr, 'rb')
	en_wav = wave.open(en, 'rb')

	output = wave.open(os.path.join(output_dir, '%d.wav' % i), 'wb')

	# modify params -- wave allocates 2Gb of memory by default
	params = fr_wav.getparams()
	params = params[:3] + tuple([fr_len + en_len]) + params[4:]

	output.setparams(params)
	output.writeframes(fr_wav.readframes(fr_len))
	output.writeframes(en_wav.readframes(en_len))
	output.close()


	if __name__ == '__main__':
	main()