rgs1/audio2text.py

## audio2text.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import os
import sys
import time

import requests


def get_params():
    """ get the cmdline params """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'audio_file',
        type=str
    )
    parser.add_argument(
        'gs_prefix',
        type=str
    )

    return parser.parse_args()


RECOGNIZE_REQ_URL = 'https://speech.googleapis.com/v1/speech:longrunningrecognize'
RECOGNIZE_REP_URL = 'https://speech.googleapis.com/v1/operations/'
STORAGE_URL = 'https://storage.googleapis.com/'


def gs_upload(file_path, gs_prefix, session, headers):
    filename = os.path.basename(file_path)
    gs_path = os.path.join('gs://', gs_prefix, filename)
    url = os.path.join(STORAGE_URL, gs_prefix, filename)

    with open(file_path, 'r') as f:
        resp = session.put(url, files={'file': f}, headers=headers)
        if resp.status_code != 200:
            print resp.content
            print 'Error when uploading file to GS: %d' % resp.status_code
            sys.exit(1)

    return gs_path


def audio2text(audio_file, gs_prefix, headers):
    session = requests.Session()

    # upload the audio file
    gs_path = gs_upload(audio_file, gs_prefix, session, headers)

    # submit the request
    data = {
        'config': {
            'encoding': 'OGG_OPUS',  # default format for whatsapp audio
            'sample_rate_hertz': 16000,
            'language_code': 'es-ES',
        },
        'audio': {
            'uri': gs_path
        }
    }
    resp = session.post(RECOGNIZE_REQ_URL, json=data, headers=headers)
    if resp.status_code != 200:
        print 'Bad status code when submitting audio: %d' % resp.status_code
        sys.exit(2)

    # poll until the result is ready
    operation_id = resp.json()['name']
    result_url = '%s%s' % (RECOGNIZE_REP_URL, operation_id)
    while True:
        resp = session.get(result_url, headers=headers)
        if resp.status_code != 200:
            print 'Bad status code when fetching results: %d' % resp.status_code
            time.sleep(5)
            continue

        data = resp.json()
        done = data.get('done', False)
        if not done:
            print 'Result is not ready yet.. sleeping'
            time.sleep(5)
            continue

        results = data['response'].get('results')
        if not results:
            print 'Results not ready yet.. sleeping'
            time.sleep(5)
            continue

        text_fragments = []
        for res in results:
            # TODO pick up the alternative with the highest confidence
            transcript = res['alternatives'][0]['transcript']
            text_fragments.append(transcript)

        print ''.join(text_fragments)
        break


if __name__ == '__main__':
    auth_token = os.environ.get('AUTH_TOKEN')
    if auth_token is None:
        print "AUTH_TOKEN not find in the environment"
        sys.exit(1)

    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer %s' % auth_token,
    }
    params = get_params()
    audio2text(params.audio_file, params.gs_prefix, headers)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import argparse
	import os
	import sys
	import time

	import requests


	def get_params():
	""" get the cmdline params """
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'audio_file',
	type=str
	)
	parser.add_argument(
	'gs_prefix',
	type=str
	)

	return parser.parse_args()


	RECOGNIZE_REQ_URL = 'https://speech.googleapis.com/v1/speech:longrunningrecognize'
	RECOGNIZE_REP_URL = 'https://speech.googleapis.com/v1/operations/'
	STORAGE_URL = 'https://storage.googleapis.com/'


	def gs_upload(file_path, gs_prefix, session, headers):
	filename = os.path.basename(file_path)
	gs_path = os.path.join('gs://', gs_prefix, filename)
	url = os.path.join(STORAGE_URL, gs_prefix, filename)

	with open(file_path, 'r') as f:
	resp = session.put(url, files={'file': f}, headers=headers)
	if resp.status_code != 200:
	print resp.content
	print 'Error when uploading file to GS: %d' % resp.status_code
	sys.exit(1)

	return gs_path


	def audio2text(audio_file, gs_prefix, headers):
	session = requests.Session()

	# upload the audio file
	gs_path = gs_upload(audio_file, gs_prefix, session, headers)

	# submit the request
	data = {
	'config': {
	'encoding': 'OGG_OPUS', # default format for whatsapp audio
	'sample_rate_hertz': 16000,
	'language_code': 'es-ES',
	},
	'audio': {
	'uri': gs_path
	}
	}
	resp = session.post(RECOGNIZE_REQ_URL, json=data, headers=headers)
	if resp.status_code != 200:
	print 'Bad status code when submitting audio: %d' % resp.status_code
	sys.exit(2)

	# poll until the result is ready
	operation_id = resp.json()['name']
	result_url = '%s%s' % (RECOGNIZE_REP_URL, operation_id)
	while True:
	resp = session.get(result_url, headers=headers)
	if resp.status_code != 200:
	print 'Bad status code when fetching results: %d' % resp.status_code
	time.sleep(5)
	continue

	data = resp.json()
	done = data.get('done', False)
	if not done:
	print 'Result is not ready yet.. sleeping'
	time.sleep(5)
	continue

	results = data['response'].get('results')
	if not results:
	print 'Results not ready yet.. sleeping'
	time.sleep(5)
	continue

	text_fragments = []
	for res in results:
	# TODO pick up the alternative with the highest confidence
	transcript = res['alternatives'][0]['transcript']
	text_fragments.append(transcript)

	print ''.join(text_fragments)
	break


	if __name__ == '__main__':
	auth_token = os.environ.get('AUTH_TOKEN')
	if auth_token is None:
	print "AUTH_TOKEN not find in the environment"
	sys.exit(1)

	headers = {
	'Content-Type': 'application/json',
	'Authorization': 'Bearer %s' % auth_token,
	}
	params = get_params()
	audio2text(params.audio_file, params.gs_prefix, headers)