Skip to content

Instantly share code, notes, and snippets.

@rgs1
Created June 26, 2017 06:00
Show Gist options
  • Save rgs1/0261f98677d3c4161ee7401d771a4537 to your computer and use it in GitHub Desktop.
Save rgs1/0261f98677d3c4161ee7401d771a4537 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import os
import sys
import time
import requests
def get_params():
""" get the cmdline params """
parser = argparse.ArgumentParser()
parser.add_argument(
'audio_file',
type=str
)
parser.add_argument(
'gs_prefix',
type=str
)
return parser.parse_args()
RECOGNIZE_REQ_URL = 'https://speech.googleapis.com/v1/speech:longrunningrecognize'
RECOGNIZE_REP_URL = 'https://speech.googleapis.com/v1/operations/'
STORAGE_URL = 'https://storage.googleapis.com/'
def gs_upload(file_path, gs_prefix, session, headers):
filename = os.path.basename(file_path)
gs_path = os.path.join('gs://', gs_prefix, filename)
url = os.path.join(STORAGE_URL, gs_prefix, filename)
with open(file_path, 'r') as f:
resp = session.put(url, files={'file': f}, headers=headers)
if resp.status_code != 200:
print resp.content
print 'Error when uploading file to GS: %d' % resp.status_code
sys.exit(1)
return gs_path
def audio2text(audio_file, gs_prefix, headers):
session = requests.Session()
# upload the audio file
gs_path = gs_upload(audio_file, gs_prefix, session, headers)
# submit the request
data = {
'config': {
'encoding': 'OGG_OPUS', # default format for whatsapp audio
'sample_rate_hertz': 16000,
'language_code': 'es-ES',
},
'audio': {
'uri': gs_path
}
}
resp = session.post(RECOGNIZE_REQ_URL, json=data, headers=headers)
if resp.status_code != 200:
print 'Bad status code when submitting audio: %d' % resp.status_code
sys.exit(2)
# poll until the result is ready
operation_id = resp.json()['name']
result_url = '%s%s' % (RECOGNIZE_REP_URL, operation_id)
while True:
resp = session.get(result_url, headers=headers)
if resp.status_code != 200:
print 'Bad status code when fetching results: %d' % resp.status_code
time.sleep(5)
continue
data = resp.json()
done = data.get('done', False)
if not done:
print 'Result is not ready yet.. sleeping'
time.sleep(5)
continue
results = data['response'].get('results')
if not results:
print 'Results not ready yet.. sleeping'
time.sleep(5)
continue
text_fragments = []
for res in results:
# TODO pick up the alternative with the highest confidence
transcript = res['alternatives'][0]['transcript']
text_fragments.append(transcript)
print ''.join(text_fragments)
break
if __name__ == '__main__':
auth_token = os.environ.get('AUTH_TOKEN')
if auth_token is None:
print "AUTH_TOKEN not find in the environment"
sys.exit(1)
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer %s' % auth_token,
}
params = get_params()
audio2text(params.audio_file, params.gs_prefix, headers)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment