Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script that creates subtitles (closed captions) for all MP4 video files in your current directory
# Sebastian Raschka 09/24/2022
# Create a new conda environment and packages
# conda create -n whisper python=3.9
# conda activate whisper
# conda install mlxtend -c conda-forge
# Install ffmpeg
# macOS & homebrew
# brew install ffmpeg
# Ubuntu
# sudo apt-get install ffmpeg
# Install whisper
# from repo https://github.com/openai/whisper
# pip install git+https://github.com/openai/whisper.git
import os
import os.path as osp
from mlxtend.file_io import find_files
from mlxtend.utils import Counter
all_videos = find_files(substring=".mp4", path="./", recursive=True)
print("Example path:", all_videos[0])
print("Number of videos to process:", len(all_videos))
audio_outdir = "./extracted_audio"
subtitle_outdir = "./generated_subtitles"
for this_dir in (audio_outdir, subtitle_outdir):
if not osp.exists(this_dir):
os.mkdir(this_dir)
cnt = Counter()
for v in all_videos:
base, ext = osp.splitext(v)
aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac"
# exctract audio file from video
os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ")
os.system(
f"whisper {aac_file_out} --model medium --language English --output_dir {subtitle_outdir} --verbose False"
)
cnt.update()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment