Created
June 24, 2019 01:37
-
-
Save jeakwon/11f7348b4a82c1f62037573f8b6f057a to your computer and use it in GitHub Desktop.
google_cloud_speech_recognition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="<credential_file>.json" | |
def transcribe_gcs_with_word_time_offsets(gcs_uri): | |
"""Transcribe the given audio file asynchronously and output the word time | |
offsets.""" | |
from google.cloud import speech | |
from google.cloud.speech import enums | |
from google.cloud.speech import types | |
client = speech.SpeechClient() | |
audio = types.RecognitionAudio(uri=gcs_uri) | |
config = types.RecognitionConfig( | |
encoding=enums.RecognitionConfig.AudioEncoding.FLAC, | |
sample_rate_hertz=16000, | |
language_code='en-US', | |
enable_word_time_offsets=True) | |
operation = client.long_running_recognize(config, audio) | |
print('Waiting for operation to complete...') | |
result = operation.result(timeout=90) | |
for result in result.results: | |
alternative = result.alternatives[0] | |
print(u'Transcript: {}'.format(alternative.transcript)) | |
print('Confidence: {}'.format(alternative.confidence)) | |
for word_info in alternative.words: | |
word = word_info.word | |
start_time = word_info.start_time | |
end_time = word_info.end_time | |
print('Word: {}, start_time: {}, end_time: {}'.format( | |
word, | |
start_time.seconds + start_time.nanos * 1e-9, | |
end_time.seconds + end_time.nanos * 1e-9)) | |
if __name__ == "__main__": | |
transcribe_gcs_with_word_time_offsets("gs://cloud-samples-tests/speech/vr.flac") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment