Created
November 4, 2023 23:27
-
-
Save swenson/efa268aa5d0d1b0c5e2db32e3e65a771 to your computer and use it in GitHub Desktop.
Quick and dirty script to find files with missing subtitles and transcribe them with whisper.cpp -- requires ffmpeg and whisper.cpp. Run like: python3 transcribe-missing.py path/to/media/files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euxo pipefail | |
echo "Converting audio" | |
rm -f temp.wav | |
ffmpeg -i "$1" -ar 16000 -ac 1 -c:a pcm_s16le temp.wav | |
echo "Transcribing" | |
./main -m models/ggml-base.en.bin -f ./temp.wav --output-srt -t 8 -ml 42 | |
mv temp.wav.srt "${1%.*}.en.srt" | |
# optional: rewrite the original file to include subtitles | |
#echo "Adding to video file" | |
#ffmpeg -i "$1" -i temp.wav.srt -c copy -metadata:s:s:0 language=eng "${1%.*}.subtitled.mkv" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
import os.path | |
import os | |
import subprocess as sp | |
import sys | |
import time | |
def has_subtitles(fname) -> bool: | |
base, _ = os.path.splitext(fname) | |
if os.path.exists(base + '.srt') or \ | |
os.path.exists(base + '.en.srt') or \ | |
os.path.exists(base + '.english.srt') or \ | |
os.path.exists(base + '.stt') or \ | |
os.path.exists(base + '.en.stt') or \ | |
os.path.exists(base + '.english.stt') or \ | |
os.path.exists(base + '.sub'): | |
return True | |
out = sp.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', fname], stderr=sp.STDOUT) | |
for stream in json.loads(out)['streams']: | |
if stream['codec_type'] == 'subtitle': | |
return True | |
return False | |
check_endings = {'.mkv', '.mpg', '.avi', '.mp4', '.m4v', '.mov'} | |
for dirpath, dirnames, fnames in os.walk(sys.argv[1]): | |
for name in fnames: | |
fname = os.path.join(dirpath, name) | |
_, ext = os.path.splitext(fname) | |
if ext in check_endings: | |
if not has_subtitles(fname): | |
print(fname) | |
sp.check_call(['./make-subtitles.sh', fname]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment