Skip to content

Instantly share code, notes, and snippets.

@Phate334
Last active August 5, 2023 08:25
Show Gist options
  • Save Phate334/3720b44b7ad20e25536930faa581100f to your computer and use it in GitHub Desktop.
Save Phate334/3720b44b7ad20e25536930faa581100f to your computer and use it in GitHub Desktop.
Faster Whisper + Google translate
import os
import subprocess
from pathlib import Path
from tqdm import tqdm
from faster_whisper import WhisperModel
from googletrans import Translator
import pysubs2
import torch
whisper_size = "large-v2"
print('Loading model...')
if torch.cuda.is_available():
whisper_model = WhisperModel(whisper_size, device="cuda", compute_type="float16")
device = torch.device("cuda")
else:
whisper_model = WhisperModel(whisper_size, device="cpu", compute_type="int8")
device = torch.device("cpu")
def transcribe_to_srt(file_name, language, model, condition_on_previous_text=True):
assert os.path.exists(file_name), f"No {file_name} found in current path."
file_basename = os.path.splitext(file_name)[0]
output_dir = os.path.dirname(file_name)
torch.cuda.empty_cache()
print('Transcribe in progress...')
segments, info = model.transcribe(audio = file_name,
beam_size=5,
language=language,
condition_on_previous_text=condition_on_previous_text,
vad_parameters=dict(min_silence_duration_ms=1000))
total_duration = round(info.duration, 2)
results= []
with tqdm(total=total_duration, unit=" seconds") as pbar:
for s in segments:
segment_dict = {'start':s.start,'end':s.end,'text':s.text}
results.append(segment_dict)
segment_duration = s.end - s.start
pbar.update(segment_duration)
print('Transcription done')
subs = pysubs2.load_from_whisper(results)
srt_file_path = file_basename + '.srt'
subs.save(srt_file_path)
return srt_file_path
def translate_srt(srt_path: Path, target_language: str) -> Path:
translator = Translator()
subs = pysubs2.load(srt_path)
for line in tqdm(subs):
translation = translator.translate(line.text, dest=target_language)
line.text = translation.text
output_path = srt_path.rsplit('.', 1)[0] + '_' + target_language + '.srt'
subs.save(output_path, encoding='utf-8')
return output_path

Install

!pip install faster-whisper
!pip install googletrans==3.1.0a0
!pip install pysubs2
@Phate334
Copy link
Author

Phate334 commented Aug 5, 2023

Usage

from pathlib import Path
import os

storage_source = "/drive/MyDrive/Colab Notebooks/Whisper Youtube/"
for mp4 in Path(storage_source).glob('*.mp4'):
  wav_path = f"{storage_source}{mp4.stem}.wav"
  srt_path = f"{storage_source}{mp4.stem}.srt"
  translated_str_path = f"{storage_source}{mp4.stem}_translated.srt"

  os.system(f'ffmpeg -i "{str(mp4)}" -ar 16000 -ac 1 -c:a pcm_s16le "{wav_path}"')
  transcribe_to_srt(wav_path, "ja", whisper_model, condition_on_previous_text=True)
  translated_str = translate_srt(srt_path, 'zh-TW')
  os.system(f'cp "{translated_str}" "{translated_str_path}"')

  # 刪除 wav 和原始的 srt 檔案
  os.remove(wav_path)
  os.remove(srt_path)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment