Last active
April 22, 2025 04:08
-
-
Save fiddyschmitt/80892610ab58dd36ce7c619a4c3379ae to your computer and use it in GitHub Desktop.
Generate subtitles/captions for MP4 file using OpenAI Whisper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import whisper | |
from pathlib import Path | |
from concurrent.futures import ProcessPoolExecutor | |
#Requirements: | |
#pip install -U openai-whisper | |
#winget install ffmpeg | |
#copy ffmpeg.exe from this folder | |
#C:\Users\foo\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-6.0-full_build\bin\ffmpeg.exe | |
#May also need: | |
#pip install torch | |
#pip install more-itertools numba numpy tiktoken tqdm | |
#To update Whisper: | |
#cd C:\Python39\Scripts | |
#./pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git | |
def transcribe_file(input_file, model): | |
input_filename = os.path.basename(input_file) | |
output_folder = os.path.dirname(input_file) | |
try: | |
print("Processing: " + input_filename) | |
txt_output_file = Path(input_file).with_suffix('.txt') | |
srt_output_file = Path(input_file).with_suffix('.srt') | |
if os.path.isfile(txt_output_file): | |
print('Output already exists: ' + os.path.basename(txt_output_file)) | |
return | |
result = model.transcribe(input_file) | |
# with open(txt_output_file, 'w') as f: | |
# f.write(result["text"]) | |
srt_writer = whisper.utils.get_writer("srt", output_folder) | |
srt_writer(result, srt_output_file) | |
txt_writer = whisper.utils.get_writer("txt", output_folder) | |
txt_writer(result, txt_output_file) | |
except Exception as e: | |
print("[" + input_filename + f"] An error occurred: {type(e).__name__} - {str(e)}") | |
def main(): | |
input_folder_str = r"H:\Videos" | |
input_folder = Path(input_folder_str) | |
input_extensions = ['.mp4', '.avi', '.mkv', '.wav'] | |
input_files = [file for ext in input_extensions for file in input_folder.glob(f'**/*{ext}')] | |
#model = whisper.load_model("large") #takes a long time to process using this model | |
model = whisper.load_model("base") | |
# Use a ProcessPoolExecutor to run the transcriptions in parallel | |
with ProcessPoolExecutor(max_workers=4) as executor: | |
for input_file in input_files: | |
#Seems to crash if run in parallel. Not sure exactly why, but I think github discussion said the module wasn't thread safe | |
#executor.submit(transcribe_file, input_file, model) | |
transcribe_file(str(input_file), model) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment