Skip to content

Instantly share code, notes, and snippets.

@jeakwon
Created June 24, 2019 02:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeakwon/1bee9ba6bbdb4d20d4384dbfd0931084 to your computer and use it in GitHub Desktop.
Save jeakwon/1bee9ba6bbdb4d20d4384dbfd0931084 to your computer and use it in GitHub Desktop.
speech recognition of Korean wav (mono)
# Cutting .wav file from start to end
# ffmpeg -i <in_file> -ss <start_sec> -to <end_sec> -c copy <out_file>
# ffmpeg -i 1.mp3 -ss 0 -to 60 -c copy 2.mp3 (example of use)
# Generating mono .wav file
# ffmpeg -i <in_file> -acodec pcm_s16le -ac 1 -ar 16000 <out_file>
# ffmpeg -i 2.mp3 -acodec pcm_s16le -ac 1 -ar 16000 2.wav
# Combine both
# ffmpeg -i 1.mp3 -ss 0 -to 60 -c -acodec pcm_s16le -ac 1 -ar 16000 copy out.wav
import io
import os
# Imports the Google Cloud client library
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="<credential>.json"
# Instantiates a client
client = speech.SpeechClient()
# The name of the audio file to transcribe
file_name = os.path.join(
os.path.dirname(__file__),
'2.wav')
# Loads the audio into memory
with io.open(file_name, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='ko-KR',
enable_word_time_offsets=True)
# Detects speech in the audio file
response = client.recognize(config, audio)
for result in response.results:
alternative = result.alternatives[0]
print(u'Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('Word: {}, start_time: {}, end_time: {}'.format(
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment