Skip to content

Instantly share code, notes, and snippets.

@sugoi-wada
Last active June 6, 2020 09:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sugoi-wada/f6b6d0b037980496a0944289ea8883f4 to your computer and use it in GitHub Desktop.
Save sugoi-wada/f6b6d0b037980496a0944289ea8883f4 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
#
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async")
# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech
# sample-metadata
# title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
# description: Transcribe a long audio file using asynchronous speech recognition
# usage: python3 speech_transcribe_async.py [--file_path "resources/brooklyn_bridge.raw"]
# [START speech_transcribe_async]
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums
import io
import datetime
from urllib.parse import urlparse, parse_qs
def sample_long_running_recognize(file_path):
"""
Transcribe a long audio file using asynchronous speech recognition
Args:
file_path Path to local audio file, e.g. /path/audio.wav
"""
client = speech_v1.SpeechClient()
# file_path = 'resources/brooklyn_bridge.raw'
# The language of the supplied audio
language_code = "zh-Hant-TW"
# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 16000
# Encoding of audio data sent. This sample sets this explicitly.
# This field is optional for FLAC and WAV audio formats.
encoding = enums.RecognitionConfig.AudioEncoding.FLAC
config = {
"language_code": language_code,
"sample_rate_hertz": sample_rate_hertz,
"encoding": encoding,
}
if file_path.startswith('gs://'):
audio = {"uri": file_path}
new_file_path = urlparse(file_path).path[1:]
else:
new_file_path = file_path
with io.open(file_path, "rb") as f:
content = f.read()
audio = {"content": content}
operation = client.long_running_recognize(config, audio)
print(u"Waiting for operation to complete...")
response = operation.result()
today = datetime.datetime.today().strftime("%Y%m%d-%H%M%S")
with io.open("{0}.{1}.txt".format(new_file_path, today), "a") as f:
for result in response.results:
for alternative in result.alternatives:
f.write(u"{}\n".format(alternative.transcript))
# print(u"Transcript: {}\n".format(alternative.transcript))
# [END speech_transcribe_async]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--file_path", type=str, default="resources/brooklyn_bridge.raw"
)
args = parser.parse_args()
sample_long_running_recognize(args.file_path)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment