Created
February 28, 2017 07:32
-
-
Save mikezucc/aeeaca4c8dd1d7f53d617407b30b2e64 to your computer and use it in GitHub Desktop.
Process every linear16 44.1khz audio file in same level directory through Google Speech API, returns single JSON keys are filenames
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright 2016 Google Inc. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Google Cloud Speech API sample application using the REST API for batch | |
processing. | |
Example usage: python transcribe.py resources/audio.raw | |
""" | |
# [START import_libraries] | |
import argparse | |
import base64 | |
import json | |
import os | |
import googleapiclient.discovery | |
# [END import_libraries] | |
# [START authenticating] | |
# Application default credentials provided by env variable | |
# GOOGLE_APPLICATION_CREDENTIALS | |
def get_speech_service(): | |
return googleapiclient.discovery.build('speech', 'v1beta1') | |
# [END authenticating] | |
def get_current_directory_files(): | |
files = [f for f in os.listdir('.') if os.path.isfile(f)] | |
for f in files: | |
print "Discovered " + f | |
return files | |
def main(): | |
"""Transcribe the given audio file. | |
Args: | |
speech_file: the name of the audio file. | |
""" | |
mapResponse = {} | |
fileList = get_current_directory_files() | |
count = len(fileList) | |
curr = 1 | |
for filename in fileList: | |
# First print the raw json response | |
if ".raw" not in filename: | |
print("******************\n\t\tWILL NOT PROCESS " + filename + "\n******************") | |
continue | |
print(" + + + + + + + + + + " + str(curr) + " / " + str(count) + " + + + + + + + + + + ") | |
print(" + + + + + + + + + + " + filename + " + + + + + + + + + + ") | |
mapResponse[filename] = google_speech_api(filename) | |
curr = curr + 1 | |
transcribedJSONResponse = json.dumps(mapResponse, indent=2) | |
with open("TRANSCRIPTION", 'w') as transcriptionFile: | |
transcriptionFile.write(transcribedJSONResponse) | |
transcriptionFile.close() | |
def google_speech_api(filename): | |
# [START construct_request] | |
with open(filename, 'rb') as speech: | |
# Base64 encode the binary audio file for inclusion in the JSON | |
# request. | |
speech_content = base64.b64encode(speech.read()) | |
service = get_speech_service() | |
service_request = service.speech().syncrecognize( | |
body={ | |
'config': { | |
# There are a bunch of config options you can specify. See | |
# https://goo.gl/KPZn97 for the full list. | |
'encoding': 'LINEAR16', # raw 16-bit signed LE samples | |
'sampleRate': 44100, # 16 khz | |
# See http://g.co/cloud/speech/docs/languages for a list of | |
# supported languages. | |
'languageCode': 'en-US', # a BCP-47 language tag | |
}, | |
'audio': { | |
'content': speech_content.decode('UTF-8') | |
} | |
}) | |
# [END construct_request] | |
# [START send_request] | |
response = service_request.execute() | |
rawResponse = json.dumps(response, indent=2) | |
print(rawResponse) | |
# Now print the actual transcriptions | |
for result in response.get('results', []): | |
print('Result:') | |
for alternative in result['alternatives']: | |
print(u' Alternative: {}'.format(alternative['transcript'])) | |
# [END send_request] | |
return response | |
# [START run_application] | |
if __name__ == '__main__': | |
# parser = argparse.ArgumentParser( | |
# description=__doc__, | |
# formatter_class=argparse.RawDescriptionHelpFormatter) | |
# parser.add_argument( | |
# 'speech_file', help='Full path of audio file to be recognized') | |
# args = parser.parse_args() | |
main()#args.speech_file | |
# [END run_application] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment