Skip to content

Instantly share code, notes, and snippets.

@jeakwon
Created June 24, 2019 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeakwon/11f7348b4a82c1f62037573f8b6f057a to your computer and use it in GitHub Desktop.
Save jeakwon/11f7348b4a82c1f62037573f8b6f057a to your computer and use it in GitHub Desktop.
google_cloud_speech_recognition.py
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="<credential_file>.json"
def transcribe_gcs_with_word_time_offsets(gcs_uri):
"""Transcribe the given audio file asynchronously and output the word time
offsets."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US',
enable_word_time_offsets=True)
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
result = operation.result(timeout=90)
for result in result.results:
alternative = result.alternatives[0]
print(u'Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('Word: {}, start_time: {}, end_time: {}'.format(
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
if __name__ == "__main__":
transcribe_gcs_with_word_time_offsets("gs://cloud-samples-tests/speech/vr.flac")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment