Created
September 16, 2018 03:41
-
-
Save Ending2015a/24951808e947348ef9a7d5bdb659ae9f to your computer and use it in GitHub Desktop.
A simple text-to-speech script to recite English vocabulary & Chinese definition using Microsoft SAPI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# you should install pypiwin32 first: pip install pypiwin32 | |
import win32com.client | |
import re | |
import os | |
config = {'delimiter':'\n\n', # delimiter between each word | |
'group': 3, | |
'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)', # group 0 =Vocab / group 1 =Chinese Definition / group 2 =English Definition | |
'voice': [1, 0, 1], # the voice, used to each group | |
'volume': 100, | |
'spelling': 0, # spell group 0 | |
'sequence': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0], # 0 =Vocab / 1 =Chinese Def / 2 =English Def / s =spelling | |
'speed': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | |
} | |
''' | |
if you want to check how many kinds of voices your system has: | |
--- | |
speaker = win32com.client.Dispatch('SAPI.SpVoice') | |
voices = speaker.GetVoices() | |
print('there are ', voices.Count, ' kinds of voices in your system') | |
for i in range(voices.Count): | |
print(i, ': voices.Item(i).GetDescription()') | |
--- | |
BTW, my system has 2 kinds of voices | |
0 : Microsoft Hanhan Desktop - Chinese (Taiwan) | |
1 : Microsoft Zira Desktop - English (United States) | |
''' | |
output_filename = 'english_vocabulary_001.wav' | |
# content | |
# format: [Eng Vocab] \t [Chi Def] \n [remain... Eng Def] | |
text = u'''insolent\t粗魯無禮的、傲慢的 | |
adj. extremely rude, esp in expressing contempt | |
conspicuous\t顯著的、顯而易見的 | |
adj. easily seen; noticeable; remarkable | |
distortion\t扭曲、曲解 | |
n. distorting or being distorted | |
''' | |
# split vocab & def | |
def get_wordsets(text, config=config): | |
config['pat_comp'] = re.compile(config['pattern']) # compile regular expression | |
words = text.split(config['delimiter']) # split each word by dellimiter | |
print('Total: ', len(words), 'words') | |
word_sets = [] | |
for w in words: | |
m = config['pat_comp'].match(w) # match pattern | |
try: | |
wset = [m.group(n+1) for n in range(config['group'])] # split word & definitions | |
word_sets.append(wset) | |
except: | |
print('no match: ' + w) # if word no match | |
pass | |
return word_sets, config | |
# text-to-speech | |
def create_wav(filename, word_sets, config=config): | |
stream = win32com.client.Dispatch('SAPI.SpFileStream') # create file stream | |
speaker = win32com.client.Dispatch('SAPI.SpVoice') # create text-to-speech handle | |
if os.path.isfile(filename): # check if exist -> remove | |
print('file existed: ', filename) | |
os.remove(filename) | |
stream.Open(filename, 3) # create & write file / 3 = SSFMCreateForWrite | |
speaker.AudioOutputStream = stream # assign file stream to output stream | |
speaker.Volume = config['volume'] # set volume | |
voices = speaker.GetVoices() # get voices | |
def speak_once(text, volume, rate, voice): | |
speaker.Voice = voices.Item(voice) | |
speaker.Volume = volume | |
speaker.Rate = rate | |
speaker.Speak(text) | |
speaker.WaitUntilDone(-1) # -1= infinit timeout limit | |
# print out voices info | |
print('Voices: ', voices.Count) | |
for i in range(voices.Count): | |
print(i,': ' + voices.Item(i).GetDescription()) | |
for w in word_sets: # for each word | |
spell = ', '.join(list(w[config['spelling']])) #spelling | |
for idx, s in enumerate(config['sequence']): # get sequence | |
voice = s if s != 's' else config['spelling'] # get group index | |
speak_word = spell if s == 's' else w[s] # get word to be speak | |
speak_word += ', ' # halt | |
speak_voice = config['voice'][voice] # get voice index | |
speak_rate = config['speed'][idx] # speech speed | |
speak_once(speak_word, config['volume'], speak_rate, speak_voice) | |
speaker.WaitUntilDone(-1) | |
stream.Close() # close stream | |
word_sets, _ = get_wordsets(text) | |
create_wav(output_filename, word_sets, config) | |
# if you want to convert to mp3 file | |
# you can use ffmpeg and type commands as following: | |
# ffmpeg -i "input.wav" -vn -ar 44100 -ac 2 -ab 192k -af "volume=1.5" -f mp3 "output.mp3" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment