Skip to content

Instantly share code, notes, and snippets.

@deivguerrero
Created March 27, 2019 02:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deivguerrero/2811a841809312737ab7c8bbac5e5590 to your computer and use it in GitHub Desktop.
Save deivguerrero/2811a841809312737ab7c8bbac5e5590 to your computer and use it in GitHub Desktop.
Script que obtiene la transcripción de un audio alojado en Cloud Storage y busca coincidencias con expresiones regulares
import re
import audioread
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
AUDIO_CHANNELS = 2
AUDIO_RATE = 16000
BLOB_PATH = "audio.flac"
BUCKET_NAME = "audio-devlife"
LANG_CODE = 'es-MX'
ORIGINAL_PHRASE = r'(?P<phrase>(vi|bi){1}da de programador)'
POSSIBLE_PHRASE = r'(?P<phrase>(vi|bi){1}.{0,3}(de)? programador)'
DEV_PHRASE = r'programador'
with audioread.audio_open(BLOB_PATH) as f:
AUDIO_CHANNELS = int(f.channels)
AUDIO_RATE = int(f.samplerate)
gcs_uri = "gs://{}/{}".format(BUCKET_NAME, BLOB_PATH)
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
language_code=LANG_CODE,
audio_channel_count=AUDIO_CHANNELS,
enable_separate_recognition_per_channel=True)
operation = client.long_running_recognize(config, audio)
response = operation.result()
contador_phx = 0
contador_php = 0
contador_dev = 0
for result in response.results:
for alternative in result.alternatives:
print('=' * 20)
text_block = alternative.transcript
print(text_block)
phx = re.findall(ORIGINAL_PHRASE, text_block,
re.MULTILINE & re.IGNORECASE)
php = re.findall(POSSIBLE_PHRASE, text_block,
re.MULTILINE & re.IGNORECASE)
devp = re.findall(DEV_PHRASE, text_block,
re.MULTILINE & re.IGNORECASE)
if phx:
contador_phx += len(phx)
if php:
contador_php += len(php)
if devp:
contador_dev += len(devp)
print("\nFRASE ORIGINAL: {}\tFRASE POSIBLE: {}\t PALABRA PROGRAMADOR:{}".
format(contador_phx, contador_php, contador_dev))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment