This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def taxa_de_quadros(nome_arquivo_audio): | |
with wave.open(nome_arquivo_audio, "rb") as wave_file: | |
taxa_quadros = wave_file.getframerate() | |
canais = wave_file.getnchannels() | |
return taxa_quadros,canais |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def stereo_para_mono(nome_arquivo_audio): | |
sound = AudioSegment.from_wav(nome_arquivo_audio) | |
sound = sound.set_channels(1) | |
sound.export(nome_arquivo_audio, format="wav") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import speech | |
from google.cloud import storage | |
from pydub import AudioSegment | |
import wave | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def upload_blob(nome_bucket, nome_arquivo_fonte, nome_blob_destino): | |
'''Essa função assume que você já fez a autenticação''' | |
storage_client = storage.Client() | |
bucket = storage_client.get_bucket(nome_bucket) | |
blob = bucket.blob(nome_blob_destino) | |
blob.upload_from_filename(nome_arquivo_fonte) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''''É uma boa prática apagar os blobs após eles deixarem de ser necessários para o noso trabalho''' | |
def delete_blob(nome_bucket, nome_blob): | |
storage_client = storage.Client() | |
bucket = storage_client.get_bucket(nome_bucket) | |
blob = bucket.blob(nome_blob) | |
blob.delete() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def google_transcribe(filepath, nome_arquivo_audio, nome_bucket): | |
'''Aqui estamos pegando o arquivo de áudio em determinada localização, note que o áudio | |
está em um árquivo local''' | |
nome_arquivo= filepath + nome_arquivo_audio | |
'''Agora vamos usar a função que já fizemos para determinar a taxa de quadros (frame_rate) e | |
quantidade de canais(channels) que o nosso áudio possui. | |
Vale o lembrete que as boas práticas ditam que a taxa de quadros deve ser maior que 16000HZ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def write_transcripts(output_filepath, transcript_filename,transcript): | |
f= open(output_filepath + transcript_filename,"w+") | |
f.write('\n'+transcript) | |
f.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Aqui estamos percorrendo um diretório com áudios e transcrevendo todos os áudios | |
que se encontram nele''' | |
for audio_file_name in os.listdir(audio_path): | |
transcript = google_transcribe(audio_path, audio_file_name, bucketname) | |
transcript_filename = audio_file_name.split('.')[0] + '.txt' | |
write_transcripts(test_file, transcript_filename,transcript) |