anuragmishra1/microsoft_speech_api_stt.py

## microsoft_speech_api_stt.py
import requests
import http.client
import urllib.parse
import uuid
import json
import io
import os
import sys
import time
from datetime import timedelta
import argparse


#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
Ocp_Apim_Subscription_Key = os.environ.get("KEY_SPEECH")


#Following line is used to save all the console output into a text file
sys.stdout = open('speech_api_test_text_stt_output.txt', 'w')

start_time = time.monotonic()


def input_file(speech_file_path):
    global content
    if os.path.isfile(speech_file_path):
      with io.open(speech_file_path, 'rb') as audio_file:
        content = audio_file.read()
    else:
      print("File doesn't exist in the directory!")


def speech_stt():
    params = ""
    headers = {
        # NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
        'Ocp-Apim-Subscription-Key': Ocp_Apim_Subscription_Key,
    }

    AccessTokenHost = "api.cognitive.microsoft.com"
    path = "/sts/v1.0/issueToken"

    #Connect to server to get the Access Token
    print("Connect to server to get the Access Token")
    conn = http.client.HTTPSConnection(AccessTokenHost)
    conn.request("POST", path, params, headers)
    response = conn.getresponse()
    print(response.status, response.reason)

    data = response.read()
    conn.close()

    accesstoken = data.decode("UTF-8")

    endpoint = 'https://speech.platform.bing.com/recognize'
    request_id = uuid.uuid4()
    # Params from Microsoft Example
    params = {'scenarios': 'ulm',
              'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
              'locale': 'en-US',
              'version': '3.0',
              'format': 'json',
              'instanceid': '565D69FF-E928-4B7E-87DA-9A750B96D9E3',
              'requestid': uuid.uuid4(),
              'device.os': 'windows'}
    content_type = "audio/wav; codec = ""audio/pcm""; samplerate = 16000"

    headers = {'Authorization': 'Bearer ' + accesstoken,
                       'Content-Type': content_type}
    response = requests.post(endpoint,
                         params = params,
                         data = content,
                         headers = headers)
    val = json.loads(response.text)
    print('Text: ', val["results"][0]["name"])
    print('Confidence: ', val["results"][0]["confidence"])


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'speech_file_path',
        help = 'The complete file path of the speech file you want to convert from speech to text.')
    args = parser.parse_args()

    input_file(args.speech_file_path)
    speech_stt()


end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))
print('\n')
	import requests
	import http.client
	import urllib.parse
	import uuid
	import json
	import io
	import os
	import sys
	import time
	from datetime import timedelta
	import argparse


	#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
	Ocp_Apim_Subscription_Key = os.environ.get("KEY_SPEECH")


	#Following line is used to save all the console output into a text file
	sys.stdout = open('speech_api_test_text_stt_output.txt', 'w')

	start_time = time.monotonic()


	def input_file(speech_file_path):
	global content
	if os.path.isfile(speech_file_path):
	with io.open(speech_file_path, 'rb') as audio_file:
	content = audio_file.read()
	else:
	print("File doesn't exist in the directory!")


	def speech_stt():
	params = ""
	headers = {
	# NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
	'Ocp-Apim-Subscription-Key': Ocp_Apim_Subscription_Key,
	}

	AccessTokenHost = "api.cognitive.microsoft.com"
	path = "/sts/v1.0/issueToken"

	#Connect to server to get the Access Token
	print("Connect to server to get the Access Token")
	conn = http.client.HTTPSConnection(AccessTokenHost)
	conn.request("POST", path, params, headers)
	response = conn.getresponse()
	print(response.status, response.reason)

	data = response.read()
	conn.close()

	accesstoken = data.decode("UTF-8")

	endpoint = 'https://speech.platform.bing.com/recognize'
	request_id = uuid.uuid4()
	# Params from Microsoft Example
	params = {'scenarios': 'ulm',
	'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
	'locale': 'en-US',
	'version': '3.0',
	'format': 'json',
	'instanceid': '565D69FF-E928-4B7E-87DA-9A750B96D9E3',
	'requestid': uuid.uuid4(),
	'device.os': 'windows'}
	content_type = "audio/wav; codec = ""audio/pcm""; samplerate = 16000"

	headers = {'Authorization': 'Bearer ' + accesstoken,
	'Content-Type': content_type}
	response = requests.post(endpoint,
	params = params,
	data = content,
	headers = headers)
	val = json.loads(response.text)
	print('Text: ', val["results"][0]["name"])
	print('Confidence: ', val["results"][0]["confidence"])


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	description = __doc__,
	formatter_class = argparse.RawDescriptionHelpFormatter)
	parser.add_argument(
	'speech_file_path',
	help = 'The complete file path of the speech file you want to convert from speech to text.')
	args = parser.parse_args()

	input_file(args.speech_file_path)
	speech_stt()


	end_time = time.monotonic()
	print("Execution_Time:", timedelta(seconds = end_time - start_time))
	print('\n')