rasbt/video-subtitles-via-whisper.py

## video-subtitles-via-whisper.py
# Sebastian Raschka 09/24/2022
# Create a new conda environment and packages
#   conda create -n whisper python=3.9
#   conda activate whisper
#   conda install mlxtend -c conda-forge

# Install ffmpeg
# macOS & homebrew
#   brew install ffmpeg
# Ubuntu
#   sudo apt-get install ffmpeg

# Install whisper
# from repo https://github.com/openai/whisper
#   pip install git+https://github.com/openai/whisper.git

import os
import os.path as osp
from mlxtend.file_io import find_files
from mlxtend.utils import Counter


all_videos = find_files(substring=".mp4", path="./", recursive=True)
print("Example path:", all_videos[0])
print("Number of videos to process:", len(all_videos))

audio_outdir = "./extracted_audio"
subtitle_outdir = "./generated_subtitles"

for this_dir in (audio_outdir, subtitle_outdir):
    if not osp.exists(this_dir):
        os.mkdir(this_dir)

cnt = Counter()
for v in all_videos:

    base, ext = osp.splitext(v)
    aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac"

    # exctract audio file from video
    os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ")

    os.system(
        f"whisper {aac_file_out} --model medium  --language English --output_dir {subtitle_outdir} --verbose False"
    )
    cnt.update()
	# Sebastian Raschka 09/24/2022
	# Create a new conda environment and packages
	# conda create -n whisper python=3.9
	# conda activate whisper
	# conda install mlxtend -c conda-forge

	# Install ffmpeg
	# macOS & homebrew
	# brew install ffmpeg
	# Ubuntu
	# sudo apt-get install ffmpeg

	# Install whisper
	# from repo https://github.com/openai/whisper
	# pip install git+https://github.com/openai/whisper.git

	import os
	import os.path as osp
	from mlxtend.file_io import find_files
	from mlxtend.utils import Counter


	all_videos = find_files(substring=".mp4", path="./", recursive=True)
	print("Example path:", all_videos[0])
	print("Number of videos to process:", len(all_videos))

	audio_outdir = "./extracted_audio"
	subtitle_outdir = "./generated_subtitles"

	for this_dir in (audio_outdir, subtitle_outdir):
	if not osp.exists(this_dir):
	os.mkdir(this_dir)

	cnt = Counter()
	for v in all_videos:

	base, ext = osp.splitext(v)
	aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac"

	# exctract audio file from video
	os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ")

	os.system(
	f"whisper {aac_file_out} --model medium --language English --output_dir {subtitle_outdir} --verbose False"
	)
	cnt.update()