Skip to content

Instantly share code, notes, and snippets.

@anuragmishra1
Last active January 17, 2018 16:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anuragmishra1/7b9015bf6974d06579727dfd73b425e9 to your computer and use it in GitHub Desktop.
Save anuragmishra1/7b9015bf6974d06579727dfd73b425e9 to your computer and use it in GitHub Desktop.
import requests
import http.client
import urllib.parse
import uuid
import json
import io
import os
import sys
import time
from datetime import timedelta
import argparse
#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
Ocp_Apim_Subscription_Key = os.environ.get("KEY_SPEECH")
#Following line is used to save all the console output into a text file
sys.stdout = open('speech_api_test_text_stt_output.txt', 'w')
start_time = time.monotonic()
def input_file(speech_file_path):
global content
if os.path.isfile(speech_file_path):
with io.open(speech_file_path, 'rb') as audio_file:
content = audio_file.read()
else:
print("File doesn't exist in the directory!")
def speech_stt():
params = ""
headers = {
# NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
'Ocp-Apim-Subscription-Key': Ocp_Apim_Subscription_Key,
}
AccessTokenHost = "api.cognitive.microsoft.com"
path = "/sts/v1.0/issueToken"
#Connect to server to get the Access Token
print("Connect to server to get the Access Token")
conn = http.client.HTTPSConnection(AccessTokenHost)
conn.request("POST", path, params, headers)
response = conn.getresponse()
print(response.status, response.reason)
data = response.read()
conn.close()
accesstoken = data.decode("UTF-8")
endpoint = 'https://speech.platform.bing.com/recognize'
request_id = uuid.uuid4()
# Params from Microsoft Example
params = {'scenarios': 'ulm',
'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
'locale': 'en-US',
'version': '3.0',
'format': 'json',
'instanceid': '565D69FF-E928-4B7E-87DA-9A750B96D9E3',
'requestid': uuid.uuid4(),
'device.os': 'windows'}
content_type = "audio/wav; codec = ""audio/pcm""; samplerate = 16000"
headers = {'Authorization': 'Bearer ' + accesstoken,
'Content-Type': content_type}
response = requests.post(endpoint,
params = params,
data = content,
headers = headers)
val = json.loads(response.text)
print('Text: ', val["results"][0]["name"])
print('Confidence: ', val["results"][0]["confidence"])
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description = __doc__,
formatter_class = argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'speech_file_path',
help = 'The complete file path of the speech file you want to convert from speech to text.')
args = parser.parse_args()
input_file(args.speech_file_path)
speech_stt()
end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))
print('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment