Skip to content

Instantly share code, notes, and snippets.

@Ending2015a
Last active October 5, 2018 04:14
Show Gist options
  • Save Ending2015a/0931574a4be3d21648d6e19970cffb28 to your computer and use it in GitHub Desktop.
Save Ending2015a/0931574a4be3d21648d6e19970cffb28 to your computer and use it in GitHub Desktop.
A simple text-to-speech (tts) script to recite English word, Chinese definition & English definition using gTTS(google text-to-speech)
import gtts
from gtts import gTTS
import re
import os
from pydub import AudioSegment
from tqdm import tqdm, trange
import tempfile
def replace(sentence, re_list):
import re
for k, v in re_list.items():
sentence = re.sub(u'(?<=\\b)({})(?=\\b)'.format(k), v, sentence)
return sentence
abbreviation = [{}, # group 1
{ '\.\.\.': u'點點點',
'adj': u'形容詞',
'v': u'動詞',
'n': u'名詞',
'adv': u'副詞',
'conj': u'連接詞',}, # group 2
{ 'sth': 'something',
'sb': 'somebody',
'esp': 'especially',
'etc': 'etcetera',
'v': 'verb',
'adj': 'adjective',
'adv': 'adverb',
'n': 'noun',
'[\\u4e00-\\u9fff]+': '', # chinese words
'fml': 'formal',
'infml': 'informal',
'usu': 'usually',
'derog': 'derogtory',
'conj': 'conjunction',
'joc': 'jocular',
'eg': 'for example',
'vt': 'transitive verb',} # group 3
]
config = {'delimiter': '\n\n',
'group': 3,
'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)',
'voices': ['zh-TW', 'en'],
'sequence_opts': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0],
#'sequence_opts': [0, 's', 1, 2],
'speak_slow': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0=no / 1=yes
'operations': { 0: lambda group: replace(group[0], abbreviation[0]), # English words
1: lambda group: replace(group[1], abbreviation[1]), # Chinese Definition
2: lambda group: replace(group[2], abbreviation[2]), # English Definition
's': lambda group: ', '.join(list(group[0])), # Spelling
},
'voice_index_each_opts': { 0: 1,
1: 0,
2: 1,
's': 1,
},
'set': 100,
}
input_text_filename = 'mason1000.txt'
output_text_filename = 'mason1000-part{_part}({_start}~{_end})-gtts.txt'
output_mp3_filename = 'mason1000-part{_part}({_start}~{_end})-gtts-short-ver.mp3'
mp3_tag = {'artist': 'Mason',
'album': 'Mason 1000',
'date': '2018',
'genre': 'English',
'title': 'Mason 1000 - part{_part}',
'track': '{_part}'}
mp3_bitrate = "192k" #constant bitrate
def get_wordsets(text, config=config):
config['pat_comp'] = re.compile(config['pattern'])
words = text.split(config['delimiter']) # split each word
print('Total: ', len(words), 'words')
word_sets = []
for w in words:
m = config['pat_comp'].match(w) # match pattern
try:
wset = [m.group(n+1) for n in range(config['group'])] # split word & definitions
word_sets.append(wset)
except:
print('no match: ' + w) # if word no match
pass
return word_sets, config
def create_mp3(filename, word_sets, config=config, tags=mp3_tag):
voices = config['voices']
if os.path.isfile(filename):
print('file existed: ', filename)
os.remove(filename)
'''
def prepare_voice(text, voice, tmp_file):
if text == '':
sound = AudioSegment.silent(duration=10)
sound.export(tmp_file, format='mp3')
else:
tts = gTTS(text, lang=voices[voice])
tts.save(tmp_file)
'''
def download_voice(text, voice, fh):
download_voice.counter += 1
if download_voice.counter == 3:
import time
time.sleep(1)
download_voice.counter == 0
retry = 1
while True:
try:
tts = gTTS(text, lang=voice)
tts.write_to_fp(fh)
fh.seek(0)
except gtts.tts.gTTSError as e:
print('Get gtts error: ', e)
print('Retry: ', retry)
retry += 1
import time # if failed, sleep and try again
time.sleep(5)
continue
break
download_voice.counter = 0
def prepare_voice(text, voice):
sound = AudioSegment.silent(duration=0)
for t in [x for x in text.split('\n') if x != '']:
with tempfile.TemporaryFile() as fh:
download_voice(t, voices[voice], fh)
sound = sound + AudioSegment.from_mp3(fh)
return sound
print('Voices: ', len(voices))
for i in range(len(voices)):
print(i,': ' + voices[i])
tts_sequence = AudioSegment.silent(duration=10)
for w in tqdm(word_sets): # for each word
speaks = {}
# prepare speeches
for group, opt in config['operations'].items():
speaks[group] = prepare_voice(opt(w), config['voice_index_each_opts'][group])
# synthesize
for idx, s in enumerate(config['sequence_opts']):
tts_sequence = tts_sequence + AudioSegment.silent(duration=300) + speaks[s]
tts_sequence = tts_sequence + AudioSegment.silent(duration=2000)
tts_sequence.export(filename, format='mp3', tags=tags, bitrate=mp3_bitrate)
def create_txt(filename, sets):
print('::Writing words to file: ', filename)
with open(filename, 'w', encoding='utf-8-sig') as f:
for w in tqdm(sets, desc='words'):
if w[2] == '':
f.write('{}\t{}\n\n'.format(w[0], w[1]))
else:
f.write('{}\t{}\n{}\n\n'.format(w[0], w[1], w[2]))
pass
# main procedure
with open(input_text_filename, "r", encoding='utf-8-sig') as content_file:
text = content_file.read().replace('\r', '')
word_sets, _ = get_wordsets(text)
total_sets = int((len(word_sets)+config['set']-1)/config['set'])
for _track in trange( total_sets, desc='Parts'):
# set parameters
_part = _track+1
_start = _track*config['set']
_end = _start + config['set'] if _start + config['set'] < len(word_sets) else len(word_sets)
sets = word_sets[_start:_end]
_start += 1
_tags = dict(mp3_tag)
_tags['title'] = _tags['title'].format(**locals())
_tags['track'] = _tags['track'].format(**locals())
create_txt(output_text_filename.format(**locals()), sets)
create_mp3(output_mp3_filename.format(**locals()), sets, config, tags=_tags)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment