Skip to content

Instantly share code, notes, and snippets.

@anuragmishra1
Last active January 17, 2018 17:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anuragmishra1/db13c09434974e04948a2caf8c4b0f4e to your computer and use it in GitHub Desktop.
Save anuragmishra1/db13c09434974e04948a2caf8c4b0f4e to your computer and use it in GitHub Desktop.
import http.client
import urllib.parse
import json
from xml.etree import ElementTree
import os
import sys
import wave
import time
from datetime import timedelta
import argparse
#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
Ocp_Apim_Subscription_Key = os.environ.get("KEY_SPEECH")
#Following line is used to save all the console output into a text file
sys.stdout = open('speech_api_test_text_tts_output.txt', 'a')
start_time = time.monotonic()
def input_file(text_file_path):
global text
if os.path.isfile(text_file_path):
with open(text_file_path, 'r') as text_file:
text = text_file.read()
else:
print("File doesn't exist in the directory!")
def speech_tts():
params = ""
headers = {
# NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
'Ocp-Apim-Subscription-Key': Ocp_Apim_Subscription_Key,
}
AccessTokenHost = "api.cognitive.microsoft.com"
path = "/sts/v1.0/issueToken"
# Connect to server to get the Access Token
print("Connect to server to get the Access Token")
conn = http.client.HTTPSConnection(AccessTokenHost)
conn.request("POST", path, params, headers)
response = conn.getresponse()
print(response.status, response.reason)
data = response.read()
conn.close()
accesstoken = data.decode("UTF-8")
body = ElementTree.Element('speak', version = '1.0')
body.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-us')
voice = ElementTree.SubElement(body, 'voice')
voice.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
voice.set('{http://www.w3.org/XML/1998/namespace}gender', 'Female')
voice.set('name', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)')
voice.text = text
headers = {"Content-type": "application/ssml+xml",
"X-Microsoft-OutputFormat": "riff-16khz-16bit-mono-pcm",
"Authorization": "Bearer " + accesstoken,
"X-Search-AppId": "07D3234E49CE426DAA29772419F436CA",
"X-Search-ClientID": "1ECFAE91408841A480F00935DC390960",
"User-Agent": "TTSForPython"}
#Connect to server to synthesize the wave
print("\nConnect to server to synthesize the wave")
conn = http.client.HTTPSConnection("speech.platform.bing.com")
conn.request("POST", "/synthesize", ElementTree.tostring(body), headers)
response = conn.getresponse()
print(response.status, response.reason)
data = response.read()
conn.close()
print("The synthesized wave length: %d" %(len(data)))
if len(text) >= 3:
wf = wave.open('bing_test_text_tts.wav', 'wb')
wf.setframerate(16000)
wf.setnchannels(1)
wf.setsampwidth(2)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description = __doc__,
formatter_class = argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'text_file_path',
help = 'The complete file path of the text file you want to convert from text to speech.')
args = parser.parse_args()
input_file(args.text_file_path)
speech_tts()
end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))
print('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment