Skip to content

Instantly share code, notes, and snippets.

@rubyu
Last active June 18, 2023 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubyu/9079512bd8856cf9e910da2824709d80 to your computer and use it in GitHub Desktop.
Save rubyu/9079512bd8856cf9e910da2824709d80 to your computer and use it in GitHub Desktop.
"""
This is a script for creating English subtitle files for movie files.
Note that all descendants of the given folder are processed, not just those directly under the folder.
Subtitles are transcribed from English audio tracks using the Whisper model, so the quality is almost the same as CC (Closed Caption).
The srt file generated by this script is automatically loaded by players such as MPC (Media Player Classic) and it's derivatives.
This script requires:
- NVIDIA graphics card with sufficient memory (12GB+ recommended)
- CUDA 11
- cuDNN for CUDA 11
- zlib
- ffmpeg and ffprobe
- https://github.com/jianfch/stable-ts/ (!! important !!)
Note: It's been reported that the original implementation of openai/whisper is sometimes out of sync with the timestamp,
but we can relax this problem to a practically acceptable level by using stable-ts instead.
Regarding NVIDIA CUDA, I used the following binaries to set up my environment:
- cuda_11.8.0_522.06_windows.exe (https://developer.nvidia.com/cuda-toolkit-archive)
- cudnn-windows-x86_64-8.9.2.26_cuda11-archive.zip (https://developer.nvidia.com/cudnn)
- zlib123dllx64.zip (http://www.winimage.com/zLibDll/zlib123dllx64.zip)
"""
import os
import subprocess
import json
import stable_whisper
model = stable_whisper.load_model('large-v2')
ffprobe = 'ffprobe'
ffmpeg = 'ffmpeg'
def get_streams(filepath):
command = [ffprobe,
'-v', 'error',
'-print_format', 'json',
'-show_entries', 'stream=index,codec_type,codec_name:stream_tags=language',
filepath]
print(f'command: {command}')
result = subprocess.run(command, capture_output=True, text=True)
stdout = result.stdout
stderr = result.stderr
print(f'stdout: {stdout}')
print(f'stderr: {stderr}')
data = json.loads(stdout)
return data['streams']
def get_audio_streams(streams, target_langs=['eng']):
return list(filter(lambda x: x['codec_type'] == 'audio' and x['tags']['language'] in target_langs, streams))
def extract_audio_file(input_path, audio_stream, output_path):
idx = audio_stream['index']
command = [ffmpeg,
'-i', input_path,
'-map', f'0:{idx}',
'-vn',
'-ar', '44100',
'-acodec', 'pcm_s16le', output_path]
print(f'command: {command}')
result = subprocess.run(command, capture_output=True, text=True)
stdout = result.stdout
stderr = result.stderr
print(f'stdout: {stdout}')
print(f'stderr: {stderr}')
def transcribe(path):
result = model.transcribe(path, language='en')
return result
def process_file(path):
print(f'Processing file: {path}')
if path.endswith('.srt'):
print('srt file; skip')
return
if os.path.exists(f'{path}.eng.1.srt'):
print('srt file already exists; skip')
return
tmp_file = '_tmp.wav'
try:
streams = get_streams(path)
audio_streams = get_audio_streams(streams)
for i, stream in enumerate(audio_streams):
try:
if os.path.exists(tmp_file):
print(f'removing {tmp_file}')
os.remove(tmp_file)
extract_audio_file(path, stream, tmp_file)
res = transcribe(tmp_file)
res.to_srt_vtt(f'{path}.eng.{i+1}.srt')
except Exception as ex:
print(ex)
except Exception as ex:
print(ex)
def process_files(directory):
for dir_path, dir_names, filenames in os.walk(directory):
for name in filenames:
process_file(os.path.join(dir_path, name))
if __name__ == '__main__':
process_files(r'path_to_your_movie_folder')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment