Skip to content

Instantly share code, notes, and snippets.

@mikezucc
Created February 28, 2017 07:32
Show Gist options
  • Save mikezucc/aeeaca4c8dd1d7f53d617407b30b2e64 to your computer and use it in GitHub Desktop.
Save mikezucc/aeeaca4c8dd1d7f53d617407b30b2e64 to your computer and use it in GitHub Desktop.
Process every linear16 44.1khz audio file in same level directory through Google Speech API, returns single JSON keys are filenames
#!/usr/bin/env python
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Speech API sample application using the REST API for batch
processing.
Example usage: python transcribe.py resources/audio.raw
"""
# [START import_libraries]
import argparse
import base64
import json
import os
import googleapiclient.discovery
# [END import_libraries]
# [START authenticating]
# Application default credentials provided by env variable
# GOOGLE_APPLICATION_CREDENTIALS
def get_speech_service():
return googleapiclient.discovery.build('speech', 'v1beta1')
# [END authenticating]
def get_current_directory_files():
files = [f for f in os.listdir('.') if os.path.isfile(f)]
for f in files:
print "Discovered " + f
return files
def main():
"""Transcribe the given audio file.
Args:
speech_file: the name of the audio file.
"""
mapResponse = {}
fileList = get_current_directory_files()
count = len(fileList)
curr = 1
for filename in fileList:
# First print the raw json response
if ".raw" not in filename:
print("******************\n\t\tWILL NOT PROCESS " + filename + "\n******************")
continue
print(" + + + + + + + + + + " + str(curr) + " / " + str(count) + " + + + + + + + + + + ")
print(" + + + + + + + + + + " + filename + " + + + + + + + + + + ")
mapResponse[filename] = google_speech_api(filename)
curr = curr + 1
transcribedJSONResponse = json.dumps(mapResponse, indent=2)
with open("TRANSCRIPTION", 'w') as transcriptionFile:
transcriptionFile.write(transcribedJSONResponse)
transcriptionFile.close()
def google_speech_api(filename):
# [START construct_request]
with open(filename, 'rb') as speech:
# Base64 encode the binary audio file for inclusion in the JSON
# request.
speech_content = base64.b64encode(speech.read())
service = get_speech_service()
service_request = service.speech().syncrecognize(
body={
'config': {
# There are a bunch of config options you can specify. See
# https://goo.gl/KPZn97 for the full list.
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'sampleRate': 44100, # 16 khz
# See http://g.co/cloud/speech/docs/languages for a list of
# supported languages.
'languageCode': 'en-US', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
# [END construct_request]
# [START send_request]
response = service_request.execute()
rawResponse = json.dumps(response, indent=2)
print(rawResponse)
# Now print the actual transcriptions
for result in response.get('results', []):
print('Result:')
for alternative in result['alternatives']:
print(u' Alternative: {}'.format(alternative['transcript']))
# [END send_request]
return response
# [START run_application]
if __name__ == '__main__':
# parser = argparse.ArgumentParser(
# description=__doc__,
# formatter_class=argparse.RawDescriptionHelpFormatter)
# parser.add_argument(
# 'speech_file', help='Full path of audio file to be recognized')
# args = parser.parse_args()
main()#args.speech_file
# [END run_application]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment