Skip to content

Instantly share code, notes, and snippets.

@ChadDevOps
Created November 3, 2023 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChadDevOps/7dc4a42a6c54f79697d33ecc31bc60df to your computer and use it in GitHub Desktop.
Save ChadDevOps/7dc4a42a6c54f79697d33ecc31bc60df to your computer and use it in GitHub Desktop.
Convert audio files to txt using openai's faster whisper
# https://github.com/guillaumekln/faster-whisper
# pip install faster-whisper
# pip install nvidia-cublas-cu11 nvidia-cudnn-cu11
# pip install torch
# Export your library prior to running in WSL or virtual env
# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
import os
import torch, gc
import shutil
# get path
# python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'
os.environ['LD_LIBRARY_PATH'] = "/usr/local/lib/python3.8/dist-packages/nvidia/cublas/lib:/usr/local/lib/python3.8/dist-packages/nvidia/cudnn/lib"
from faster_whisper import WhisperModel
model_size = "large-v2"
#List of valid file extensions
valid_extensions = (".WAV", ".mp3")
# Run on GPU with FP32
model = WhisperModel(model_size, device="cuda", compute_type="float32")
# or run on GPU with INT8, float16, float32
# model = WhisperModel(model_size, device="cuda", compute_type="int8")
# or run on CPU with INT8
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
# Replace 'your_directory' with the actual path to your audio files directory
audio_directory = '/mnt/d/Recordings'
destination_directory = '/mnt/d/Recordings/converted'
for audio_file in os.listdir(audio_directory):
if audio_file.endswith(valid_extensions):
source_file = os.path.join(audio_directory, audio_file)
txt_file = audio_file + ".txt"
output_file = os.path.join(destination_directory, txt_file)
segments, info = model.transcribe(source_file, beam_size=5, vad_filter=True)
# You can process the segments and info here as needed
print(f"Transcribing {audio_file}...")
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
# Open the file in write mode
with open(output_file, 'w') as file:
for segment in segments:
start_seconds = segment.start
end_seconds = segment.end
start_hours = int(start_seconds // 3600)
start_minutes = int((start_seconds % 3600) // 60)
start_seconds = start_seconds % 60
end_hours = int(end_seconds // 3600)
end_minutes = int((end_seconds % 3600) // 60)
end_seconds = end_seconds % 60
# Your code for generating the output
output = "[%02d:%02d:%02d -> %02d:%02d:%02d] %s" % (start_hours, start_minutes, start_seconds, end_hours, end_minutes, end_seconds, segment.text)
print(output)
# Open the file in write mode and save the output
output += '\n'
file.write(output)
print(f"Output saved to {output_file}")
shutil.move(source_file, destination_directory)
del segments
del info
torch.cuda.empty_cache()
del model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment