Ending2015a/speak.py

## speak.py
# you should install pypiwin32  first: pip install pypiwin32
import win32com.client
import re
import os

config = {'delimiter':'\n\n',  # delimiter between each word
          'group': 3,
          'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)', # group 0 =Vocab / group 1 =Chinese Definition / group 2 =English Definition
          'voice': [1, 0, 1], # the voice, used to each group
          'volume': 100,
          'spelling': 0,     # spell group 0
          'sequence': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0],   # 0 =Vocab / 1 =Chinese Def / 2 =English Def / s =spelling
          'speed':    [0, 0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0]
          }

'''
if you want to check how many kinds of voices your system has:

---
speaker = win32com.client.Dispatch('SAPI.SpVoice')
voices = speaker.GetVoices()
print('there are ', voices.Count, ' kinds of voices in your system')
for i in range(voices.Count):
    print(i, ': voices.Item(i).GetDescription()')
---

BTW, my system has 2 kinds of voices
0 : Microsoft Hanhan Desktop - Chinese (Taiwan)
1 : Microsoft Zira Desktop - English (United States)
'''

output_filename = 'english_vocabulary_001.wav'

# content
# format:  [Eng Vocab] \t [Chi Def] \n [remain... Eng Def]
text = u'''insolent\t粗魯無禮的、傲慢的
adj. extremely rude, esp in expressing contempt

conspicuous\t顯著的、顯而易見的
adj. easily seen; noticeable; remarkable

distortion\t扭曲、曲解
n. distorting or being distorted
'''

# split vocab & def
def get_wordsets(text, config=config):
    config['pat_comp'] = re.compile(config['pattern'])  # compile regular expression

    words = text.split(config['delimiter'])  # split each word by dellimiter
    print('Total: ', len(words), 'words')

    word_sets = []
    for w in words:
        m = config['pat_comp'].match(w)   # match pattern
        try:
            wset = [m.group(n+1) for n in range(config['group'])]  # split word & definitions
            word_sets.append(wset)
        except:
            print('no match: ' + w)   # if word no match
            pass
    return word_sets, config

# text-to-speech
def create_wav(filename, word_sets, config=config):

    stream = win32com.client.Dispatch('SAPI.SpFileStream')  # create file stream
    speaker = win32com.client.Dispatch('SAPI.SpVoice')      # create text-to-speech handle

    if os.path.isfile(filename): # check if exist -> remove
        print('file existed: ', filename)
        os.remove(filename)
    stream.Open(filename, 3)  # create & write file / 3 = SSFMCreateForWrite

    speaker.AudioOutputStream = stream  # assign file stream to output stream
    speaker.Volume = config['volume']   # set volume
    voices = speaker.GetVoices()        # get voices

    def speak_once(text, volume, rate, voice):
        speaker.Voice = voices.Item(voice)
        speaker.Volume = volume
        speaker.Rate = rate
        speaker.Speak(text)
        speaker.WaitUntilDone(-1)  # -1= infinit timeout limit

    # print out voices info
    print('Voices: ', voices.Count)
    for i in range(voices.Count):
        print(i,': ' + voices.Item(i).GetDescription())

    for w in word_sets: # for each word
        spell = ', '.join(list(w[config['spelling']]))  #spelling

        for idx, s in enumerate(config['sequence']): # get sequence
            voice = s if s != 's' else config['spelling']  # get group index

            speak_word = spell if s == 's' else w[s] # get word to be speak
            speak_word += ', '  # halt
            speak_voice = config['voice'][voice] # get voice index
            speak_rate = config['speed'][idx]   # speech speed

            speak_once(speak_word, config['volume'], speak_rate, speak_voice)

    speaker.WaitUntilDone(-1)
    stream.Close()  # close stream

word_sets, _ = get_wordsets(text)
create_wav(output_filename, word_sets, config)
# if you want to convert to mp3 file
#  you can use ffmpeg and type commands as following:
#  ffmpeg -i "input.wav" -vn -ar 44100 -ac 2 -ab 192k -af "volume=1.5" -f mp3 "output.mp3"
	# you should install pypiwin32 first: pip install pypiwin32
	import win32com.client
	import re
	import os

	config = {'delimiter':'\n\n', # delimiter between each word
	'group': 3,
	'pattern': '([^\t]+)\t([^\n]+)\n?([\s\S]*)', # group 0 =Vocab / group 1 =Chinese Definition / group 2 =English Definition
	'voice': [1, 0, 1], # the voice, used to each group
	'volume': 100,
	'spelling': 0, # spell group 0
	'sequence': [0, 0, 's', 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, 's', 1, 2, 0, 0], # 0 =Vocab / 1 =Chinese Def / 2 =English Def / s =spelling
	'speed': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
	}

	'''
	if you want to check how many kinds of voices your system has:

	---
	speaker = win32com.client.Dispatch('SAPI.SpVoice')
	voices = speaker.GetVoices()
	print('there are ', voices.Count, ' kinds of voices in your system')
	for i in range(voices.Count):
	print(i, ': voices.Item(i).GetDescription()')
	---

	BTW, my system has 2 kinds of voices
	0 : Microsoft Hanhan Desktop - Chinese (Taiwan)
	1 : Microsoft Zira Desktop - English (United States)
	'''

	output_filename = 'english_vocabulary_001.wav'

	# content
	# format: [Eng Vocab] \t [Chi Def] \n [remain... Eng Def]
	text = u'''insolent\t粗魯無禮的、傲慢的
	adj. extremely rude, esp in expressing contempt

	conspicuous\t顯著的、顯而易見的
	adj. easily seen; noticeable; remarkable

	distortion\t扭曲、曲解
	n. distorting or being distorted
	'''

	# split vocab & def
	def get_wordsets(text, config=config):
	config['pat_comp'] = re.compile(config['pattern']) # compile regular expression

	words = text.split(config['delimiter']) # split each word by dellimiter
	print('Total: ', len(words), 'words')

	word_sets = []
	for w in words:
	m = config['pat_comp'].match(w) # match pattern
	try:
	wset = [m.group(n+1) for n in range(config['group'])] # split word & definitions
	word_sets.append(wset)
	except:
	print('no match: ' + w) # if word no match
	pass
	return word_sets, config

	# text-to-speech
	def create_wav(filename, word_sets, config=config):

	stream = win32com.client.Dispatch('SAPI.SpFileStream') # create file stream
	speaker = win32com.client.Dispatch('SAPI.SpVoice') # create text-to-speech handle

	if os.path.isfile(filename): # check if exist -> remove
	print('file existed: ', filename)
	os.remove(filename)
	stream.Open(filename, 3) # create & write file / 3 = SSFMCreateForWrite

	speaker.AudioOutputStream = stream # assign file stream to output stream
	speaker.Volume = config['volume'] # set volume
	voices = speaker.GetVoices() # get voices

	def speak_once(text, volume, rate, voice):
	speaker.Voice = voices.Item(voice)
	speaker.Volume = volume
	speaker.Rate = rate
	speaker.Speak(text)
	speaker.WaitUntilDone(-1) # -1= infinit timeout limit

	# print out voices info
	print('Voices: ', voices.Count)
	for i in range(voices.Count):
	print(i,': ' + voices.Item(i).GetDescription())

	for w in word_sets: # for each word
	spell = ', '.join(list(w[config['spelling']])) #spelling

	for idx, s in enumerate(config['sequence']): # get sequence
	voice = s if s != 's' else config['spelling'] # get group index

	speak_word = spell if s == 's' else w[s] # get word to be speak
	speak_word += ', ' # halt
	speak_voice = config['voice'][voice] # get voice index
	speak_rate = config['speed'][idx] # speech speed

	speak_once(speak_word, config['volume'], speak_rate, speak_voice)

	speaker.WaitUntilDone(-1)
	stream.Close() # close stream

	word_sets, _ = get_wordsets(text)
	create_wav(output_filename, word_sets, config)
	# if you want to convert to mp3 file
	# you can use ffmpeg and type commands as following:
	# ffmpeg -i "input.wav" -vn -ar 44100 -ac 2 -ab 192k -af "volume=1.5" -f mp3 "output.mp3"