Created
June 26, 2017 06:00
-
-
Save rgs1/0261f98677d3c4161ee7401d771a4537 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import argparse | |
import os | |
import sys | |
import time | |
import requests | |
def get_params(): | |
""" get the cmdline params """ | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'audio_file', | |
type=str | |
) | |
parser.add_argument( | |
'gs_prefix', | |
type=str | |
) | |
return parser.parse_args() | |
RECOGNIZE_REQ_URL = 'https://speech.googleapis.com/v1/speech:longrunningrecognize' | |
RECOGNIZE_REP_URL = 'https://speech.googleapis.com/v1/operations/' | |
STORAGE_URL = 'https://storage.googleapis.com/' | |
def gs_upload(file_path, gs_prefix, session, headers): | |
filename = os.path.basename(file_path) | |
gs_path = os.path.join('gs://', gs_prefix, filename) | |
url = os.path.join(STORAGE_URL, gs_prefix, filename) | |
with open(file_path, 'r') as f: | |
resp = session.put(url, files={'file': f}, headers=headers) | |
if resp.status_code != 200: | |
print resp.content | |
print 'Error when uploading file to GS: %d' % resp.status_code | |
sys.exit(1) | |
return gs_path | |
def audio2text(audio_file, gs_prefix, headers): | |
session = requests.Session() | |
# upload the audio file | |
gs_path = gs_upload(audio_file, gs_prefix, session, headers) | |
# submit the request | |
data = { | |
'config': { | |
'encoding': 'OGG_OPUS', # default format for whatsapp audio | |
'sample_rate_hertz': 16000, | |
'language_code': 'es-ES', | |
}, | |
'audio': { | |
'uri': gs_path | |
} | |
} | |
resp = session.post(RECOGNIZE_REQ_URL, json=data, headers=headers) | |
if resp.status_code != 200: | |
print 'Bad status code when submitting audio: %d' % resp.status_code | |
sys.exit(2) | |
# poll until the result is ready | |
operation_id = resp.json()['name'] | |
result_url = '%s%s' % (RECOGNIZE_REP_URL, operation_id) | |
while True: | |
resp = session.get(result_url, headers=headers) | |
if resp.status_code != 200: | |
print 'Bad status code when fetching results: %d' % resp.status_code | |
time.sleep(5) | |
continue | |
data = resp.json() | |
done = data.get('done', False) | |
if not done: | |
print 'Result is not ready yet.. sleeping' | |
time.sleep(5) | |
continue | |
results = data['response'].get('results') | |
if not results: | |
print 'Results not ready yet.. sleeping' | |
time.sleep(5) | |
continue | |
text_fragments = [] | |
for res in results: | |
# TODO pick up the alternative with the highest confidence | |
transcript = res['alternatives'][0]['transcript'] | |
text_fragments.append(transcript) | |
print ''.join(text_fragments) | |
break | |
if __name__ == '__main__': | |
auth_token = os.environ.get('AUTH_TOKEN') | |
if auth_token is None: | |
print "AUTH_TOKEN not find in the environment" | |
sys.exit(1) | |
headers = { | |
'Content-Type': 'application/json', | |
'Authorization': 'Bearer %s' % auth_token, | |
} | |
params = get_params() | |
audio2text(params.audio_file, params.gs_prefix, headers) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment