Ending2015a/speak_gtts.py

## speak_gtts.py


import gtts
from gtts import gTTS
import re
import os
from pydub import AudioSegment
from tqdm import tqdm, trange
import tempfile


def replace(sentence, re_list):
    import re
    for k, v in re_list.items():
        sentence = re.sub(u'(?<=\\b)({})(?=\\b)'.format(k), v, sentence)
    return sentence

abbreviation = [{},  # group 1
                {  '\.\.\.': u'點點點',
                   'adj': u'形容詞',
                   'v': u'動詞',
                   'n': u'名詞',
                   'adv': u'副詞',
                   'conj': u'連接詞',},  # group 2

                {  'sth': 'something',
                   'sb': 'somebody',
                   'esp': 'especially',
                   'etc': 'etcetera',
                   'v': 'verb',
                   'adj': 'adjective',
                   'adv': 'adverb',
                   'n': 'noun',
                   '[\\u4e00-\\u9fff]+': '',  # chinese words
                   'fml': 'formal',
                   'infml': 'informal',
                   'usu': 'usually',
                   'derog': 'derogtory',
                   'conj': 'conjunction',
                   'joc': 'jocular',
                   'eg': 'for example',
                   'vt': 'transitive verb',} # group 3
                ]

config = {'delimiter': '\n\n',
          'group': 3,
          'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)',
          'voices': ['zh-TW', 'en'],
          'sequence_opts': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0],
          #'sequence_opts': [0, 's', 1, 2],
          'speak_slow': [0, 0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0],  # 0=no / 1=yes
          'operations': { 0: lambda group: replace(group[0], abbreviation[0]),   # English words
                          1: lambda group: replace(group[1], abbreviation[1]),   # Chinese Definition
                          2: lambda group: replace(group[2], abbreviation[2]),   # English Definition
                          's': lambda group: ', '.join(list(group[0])),  # Spelling
                        },
          'voice_index_each_opts': { 0: 1,
                                     1: 0,
                                     2: 1,
                                     's': 1,
                                    },
          'set': 100,
        }

input_text_filename = 'mason1000.txt'

output_text_filename = 'mason1000-part{_part}({_start}~{_end})-gtts.txt'
output_mp3_filename =  'mason1000-part{_part}({_start}~{_end})-gtts-short-ver.mp3'

mp3_tag = {'artist': 'Mason',
           'album': 'Mason 1000',
           'date': '2018',
           'genre': 'English',
           'title': 'Mason 1000 - part{_part}',
           'track': '{_part}'}

mp3_bitrate = "192k" #constant bitrate


def get_wordsets(text, config=config):

    config['pat_comp'] = re.compile(config['pattern'])

    words = text.split(config['delimiter'])  # split each word
    print('Total: ', len(words), 'words')

    word_sets = []

    for w in words:

        m = config['pat_comp'].match(w)   # match pattern
        try:
            wset = [m.group(n+1) for n in range(config['group'])]  # split word & definitions
            word_sets.append(wset)
        except:
            print('no match: ' + w)   # if word no match
            pass

    return word_sets, config


def create_mp3(filename, word_sets, config=config, tags=mp3_tag):

    voices = config['voices']

    if os.path.isfile(filename):
        print('file existed: ', filename)
        os.remove(filename)


    '''
    def prepare_voice(text, voice, tmp_file):
        if text == '':
            sound = AudioSegment.silent(duration=10)
            sound.export(tmp_file, format='mp3')
        else:
            tts = gTTS(text, lang=voices[voice])
            tts.save(tmp_file)
    '''

    def download_voice(text, voice, fh):
        download_voice.counter += 1
        if download_voice.counter == 3:
            import time
            time.sleep(1)
            download_voice.counter == 0

        retry = 1
        while True:
            try:
                tts = gTTS(text, lang=voice)
                tts.write_to_fp(fh)
                fh.seek(0)
            except gtts.tts.gTTSError as e:
                print('Get gtts error: ', e)
                print('Retry: ', retry)
                retry += 1

                import time # if failed, sleep and try again
                time.sleep(5)
                continue
            break
    download_voice.counter = 0


    def prepare_voice(text, voice):

        sound = AudioSegment.silent(duration=0)

        for t in [x for x in text.split('\n') if x != '']:
            with tempfile.TemporaryFile() as fh:
                download_voice(t, voices[voice], fh)
                sound = sound + AudioSegment.from_mp3(fh)

        return sound


    print('Voices: ', len(voices))
    for i in range(len(voices)):
        print(i,': ' + voices[i])

    tts_sequence = AudioSegment.silent(duration=10)

    for w in tqdm(word_sets): # for each word

        speaks = {}
        # prepare speeches
        for group, opt in config['operations'].items():
            speaks[group] = prepare_voice(opt(w), config['voice_index_each_opts'][group])

        # synthesize
        for idx, s in enumerate(config['sequence_opts']):
            tts_sequence = tts_sequence + AudioSegment.silent(duration=300) + speaks[s]

    tts_sequence = tts_sequence + AudioSegment.silent(duration=2000)

    tts_sequence.export(filename, format='mp3', tags=tags, bitrate=mp3_bitrate)


def create_txt(filename, sets):

    print('::Writing words to file: ', filename)
    with open(filename, 'w', encoding='utf-8-sig') as f:
        for w in tqdm(sets, desc='words'):
            if w[2] == '':
                f.write('{}\t{}\n\n'.format(w[0], w[1]))
            else:
                f.write('{}\t{}\n{}\n\n'.format(w[0], w[1], w[2]))
    pass

# main procedure

with open(input_text_filename, "r", encoding='utf-8-sig') as content_file:
    text = content_file.read().replace('\r', '')

word_sets, _ = get_wordsets(text)

total_sets = int((len(word_sets)+config['set']-1)/config['set'])

for _track in trange( total_sets, desc='Parts'):

    # set parameters
    _part = _track+1
    _start = _track*config['set']
    _end = _start + config['set'] if _start + config['set'] < len(word_sets) else len(word_sets)
    sets = word_sets[_start:_end]

    _start += 1

    _tags = dict(mp3_tag)
    _tags['title'] = _tags['title'].format(**locals())
    _tags['track'] = _tags['track'].format(**locals())

    create_txt(output_text_filename.format(**locals()), sets)
    create_mp3(output_mp3_filename.format(**locals()), sets, config, tags=_tags)


	import gtts
	from gtts import gTTS
	import re
	import os
	from pydub import AudioSegment
	from tqdm import tqdm, trange
	import tempfile


	def replace(sentence, re_list):
	import re
	for k, v in re_list.items():
	sentence = re.sub(u'(?<=\\b)({})(?=\\b)'.format(k), v, sentence)
	return sentence

	abbreviation = [{}, # group 1
	{ '\.\.\.': u'點點點',
	'adj': u'形容詞',
	'v': u'動詞',
	'n': u'名詞',
	'adv': u'副詞',
	'conj': u'連接詞',}, # group 2

	{ 'sth': 'something',
	'sb': 'somebody',
	'esp': 'especially',
	'etc': 'etcetera',
	'v': 'verb',
	'adj': 'adjective',
	'adv': 'adverb',
	'n': 'noun',
	'[\\u4e00-\\u9fff]+': '', # chinese words
	'fml': 'formal',
	'infml': 'informal',
	'usu': 'usually',
	'derog': 'derogtory',
	'conj': 'conjunction',
	'joc': 'jocular',
	'eg': 'for example',
	'vt': 'transitive verb',} # group 3
	]

	config = {'delimiter': '\n\n',
	'group': 3,
	'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)',
	'voices': ['zh-TW', 'en'],
	'sequence_opts': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0],
	#'sequence_opts': [0, 's', 1, 2],
	'speak_slow': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0=no / 1=yes
	'operations': { 0: lambda group: replace(group[0], abbreviation[0]), # English words
	1: lambda group: replace(group[1], abbreviation[1]), # Chinese Definition
	2: lambda group: replace(group[2], abbreviation[2]), # English Definition
	's': lambda group: ', '.join(list(group[0])), # Spelling
	},
	'voice_index_each_opts': { 0: 1,
	1: 0,
	2: 1,
	's': 1,
	},
	'set': 100,
	}

	input_text_filename = 'mason1000.txt'

	output_text_filename = 'mason1000-part{_part}({_start}~{_end})-gtts.txt'
	output_mp3_filename = 'mason1000-part{_part}({_start}~{_end})-gtts-short-ver.mp3'

	mp3_tag = {'artist': 'Mason',
	'album': 'Mason 1000',
	'date': '2018',
	'genre': 'English',
	'title': 'Mason 1000 - part{_part}',
	'track': '{_part}'}

	mp3_bitrate = "192k" #constant bitrate


	def get_wordsets(text, config=config):

	config['pat_comp'] = re.compile(config['pattern'])

	words = text.split(config['delimiter']) # split each word
	print('Total: ', len(words), 'words')

	word_sets = []

	for w in words:

	m = config['pat_comp'].match(w) # match pattern
	try:
	wset = [m.group(n+1) for n in range(config['group'])] # split word & definitions
	word_sets.append(wset)
	except:
	print('no match: ' + w) # if word no match
	pass

	return word_sets, config



	def create_mp3(filename, word_sets, config=config, tags=mp3_tag):

	voices = config['voices']

	if os.path.isfile(filename):
	print('file existed: ', filename)
	os.remove(filename)


	'''
	def prepare_voice(text, voice, tmp_file):
	if text == '':
	sound = AudioSegment.silent(duration=10)
	sound.export(tmp_file, format='mp3')
	else:
	tts = gTTS(text, lang=voices[voice])
	tts.save(tmp_file)
	'''

	def download_voice(text, voice, fh):
	download_voice.counter += 1
	if download_voice.counter == 3:
	import time
	time.sleep(1)
	download_voice.counter == 0

	retry = 1
	while True:
	try:
	tts = gTTS(text, lang=voice)
	tts.write_to_fp(fh)
	fh.seek(0)
	except gtts.tts.gTTSError as e:
	print('Get gtts error: ', e)
	print('Retry: ', retry)
	retry += 1

	import time # if failed, sleep and try again
	time.sleep(5)
	continue
	break
	download_voice.counter = 0


	def prepare_voice(text, voice):

	sound = AudioSegment.silent(duration=0)

	for t in [x for x in text.split('\n') if x != '']:
	with tempfile.TemporaryFile() as fh:
	download_voice(t, voices[voice], fh)
	sound = sound + AudioSegment.from_mp3(fh)

	return sound


	print('Voices: ', len(voices))
	for i in range(len(voices)):
	print(i,': ' + voices[i])

	tts_sequence = AudioSegment.silent(duration=10)

	for w in tqdm(word_sets): # for each word

	speaks = {}
	# prepare speeches
	for group, opt in config['operations'].items():
	speaks[group] = prepare_voice(opt(w), config['voice_index_each_opts'][group])

	# synthesize
	for idx, s in enumerate(config['sequence_opts']):
	tts_sequence = tts_sequence + AudioSegment.silent(duration=300) + speaks[s]

	tts_sequence = tts_sequence + AudioSegment.silent(duration=2000)

	tts_sequence.export(filename, format='mp3', tags=tags, bitrate=mp3_bitrate)


	def create_txt(filename, sets):

	print('::Writing words to file: ', filename)
	with open(filename, 'w', encoding='utf-8-sig') as f:
	for w in tqdm(sets, desc='words'):
	if w[2] == '':
	f.write('{}\t{}\n\n'.format(w[0], w[1]))
	else:
	f.write('{}\t{}\n{}\n\n'.format(w[0], w[1], w[2]))
	pass

	# main procedure

	with open(input_text_filename, "r", encoding='utf-8-sig') as content_file:
	text = content_file.read().replace('\r', '')

	word_sets, _ = get_wordsets(text)

	total_sets = int((len(word_sets)+config['set']-1)/config['set'])

	for _track in trange( total_sets, desc='Parts'):

	# set parameters
	_part = _track+1
	_start = _track*config['set']
	_end = _start + config['set'] if _start + config['set'] < len(word_sets) else len(word_sets)
	sets = word_sets[_start:_end]

	_start += 1

	_tags = dict(mp3_tag)
	_tags['title'] = _tags['title'].format(**locals())
	_tags['track'] = _tags['track'].format(**locals())

	create_txt(output_text_filename.format(**locals()), sets)
	create_mp3(output_mp3_filename.format(**locals()), sets, config, tags=_tags)