Last active
October 5, 2018 04:14
-
-
Save Ending2015a/0931574a4be3d21648d6e19970cffb28 to your computer and use it in GitHub Desktop.
A simple text-to-speech (tts) script to recite English word, Chinese definition & English definition using gTTS(google text-to-speech)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gtts | |
from gtts import gTTS | |
import re | |
import os | |
from pydub import AudioSegment | |
from tqdm import tqdm, trange | |
import tempfile | |
def replace(sentence, re_list): | |
import re | |
for k, v in re_list.items(): | |
sentence = re.sub(u'(?<=\\b)({})(?=\\b)'.format(k), v, sentence) | |
return sentence | |
abbreviation = [{}, # group 1 | |
{ '\.\.\.': u'點點點', | |
'adj': u'形容詞', | |
'v': u'動詞', | |
'n': u'名詞', | |
'adv': u'副詞', | |
'conj': u'連接詞',}, # group 2 | |
{ 'sth': 'something', | |
'sb': 'somebody', | |
'esp': 'especially', | |
'etc': 'etcetera', | |
'v': 'verb', | |
'adj': 'adjective', | |
'adv': 'adverb', | |
'n': 'noun', | |
'[\\u4e00-\\u9fff]+': '', # chinese words | |
'fml': 'formal', | |
'infml': 'informal', | |
'usu': 'usually', | |
'derog': 'derogtory', | |
'conj': 'conjunction', | |
'joc': 'jocular', | |
'eg': 'for example', | |
'vt': 'transitive verb',} # group 3 | |
] | |
config = {'delimiter': '\n\n', | |
'group': 3, | |
'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)', | |
'voices': ['zh-TW', 'en'], | |
'sequence_opts': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0], | |
#'sequence_opts': [0, 's', 1, 2], | |
'speak_slow': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0=no / 1=yes | |
'operations': { 0: lambda group: replace(group[0], abbreviation[0]), # English words | |
1: lambda group: replace(group[1], abbreviation[1]), # Chinese Definition | |
2: lambda group: replace(group[2], abbreviation[2]), # English Definition | |
's': lambda group: ', '.join(list(group[0])), # Spelling | |
}, | |
'voice_index_each_opts': { 0: 1, | |
1: 0, | |
2: 1, | |
's': 1, | |
}, | |
'set': 100, | |
} | |
input_text_filename = 'mason1000.txt' | |
output_text_filename = 'mason1000-part{_part}({_start}~{_end})-gtts.txt' | |
output_mp3_filename = 'mason1000-part{_part}({_start}~{_end})-gtts-short-ver.mp3' | |
mp3_tag = {'artist': 'Mason', | |
'album': 'Mason 1000', | |
'date': '2018', | |
'genre': 'English', | |
'title': 'Mason 1000 - part{_part}', | |
'track': '{_part}'} | |
mp3_bitrate = "192k" #constant bitrate | |
def get_wordsets(text, config=config): | |
config['pat_comp'] = re.compile(config['pattern']) | |
words = text.split(config['delimiter']) # split each word | |
print('Total: ', len(words), 'words') | |
word_sets = [] | |
for w in words: | |
m = config['pat_comp'].match(w) # match pattern | |
try: | |
wset = [m.group(n+1) for n in range(config['group'])] # split word & definitions | |
word_sets.append(wset) | |
except: | |
print('no match: ' + w) # if word no match | |
pass | |
return word_sets, config | |
def create_mp3(filename, word_sets, config=config, tags=mp3_tag): | |
voices = config['voices'] | |
if os.path.isfile(filename): | |
print('file existed: ', filename) | |
os.remove(filename) | |
''' | |
def prepare_voice(text, voice, tmp_file): | |
if text == '': | |
sound = AudioSegment.silent(duration=10) | |
sound.export(tmp_file, format='mp3') | |
else: | |
tts = gTTS(text, lang=voices[voice]) | |
tts.save(tmp_file) | |
''' | |
def download_voice(text, voice, fh): | |
download_voice.counter += 1 | |
if download_voice.counter == 3: | |
import time | |
time.sleep(1) | |
download_voice.counter == 0 | |
retry = 1 | |
while True: | |
try: | |
tts = gTTS(text, lang=voice) | |
tts.write_to_fp(fh) | |
fh.seek(0) | |
except gtts.tts.gTTSError as e: | |
print('Get gtts error: ', e) | |
print('Retry: ', retry) | |
retry += 1 | |
import time # if failed, sleep and try again | |
time.sleep(5) | |
continue | |
break | |
download_voice.counter = 0 | |
def prepare_voice(text, voice): | |
sound = AudioSegment.silent(duration=0) | |
for t in [x for x in text.split('\n') if x != '']: | |
with tempfile.TemporaryFile() as fh: | |
download_voice(t, voices[voice], fh) | |
sound = sound + AudioSegment.from_mp3(fh) | |
return sound | |
print('Voices: ', len(voices)) | |
for i in range(len(voices)): | |
print(i,': ' + voices[i]) | |
tts_sequence = AudioSegment.silent(duration=10) | |
for w in tqdm(word_sets): # for each word | |
speaks = {} | |
# prepare speeches | |
for group, opt in config['operations'].items(): | |
speaks[group] = prepare_voice(opt(w), config['voice_index_each_opts'][group]) | |
# synthesize | |
for idx, s in enumerate(config['sequence_opts']): | |
tts_sequence = tts_sequence + AudioSegment.silent(duration=300) + speaks[s] | |
tts_sequence = tts_sequence + AudioSegment.silent(duration=2000) | |
tts_sequence.export(filename, format='mp3', tags=tags, bitrate=mp3_bitrate) | |
def create_txt(filename, sets): | |
print('::Writing words to file: ', filename) | |
with open(filename, 'w', encoding='utf-8-sig') as f: | |
for w in tqdm(sets, desc='words'): | |
if w[2] == '': | |
f.write('{}\t{}\n\n'.format(w[0], w[1])) | |
else: | |
f.write('{}\t{}\n{}\n\n'.format(w[0], w[1], w[2])) | |
pass | |
# main procedure | |
with open(input_text_filename, "r", encoding='utf-8-sig') as content_file: | |
text = content_file.read().replace('\r', '') | |
word_sets, _ = get_wordsets(text) | |
total_sets = int((len(word_sets)+config['set']-1)/config['set']) | |
for _track in trange( total_sets, desc='Parts'): | |
# set parameters | |
_part = _track+1 | |
_start = _track*config['set'] | |
_end = _start + config['set'] if _start + config['set'] < len(word_sets) else len(word_sets) | |
sets = word_sets[_start:_end] | |
_start += 1 | |
_tags = dict(mp3_tag) | |
_tags['title'] = _tags['title'].format(**locals()) | |
_tags['track'] = _tags['track'].format(**locals()) | |
create_txt(output_text_filename.format(**locals()), sets) | |
create_mp3(output_mp3_filename.format(**locals()), sets, config, tags=_tags) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment