Last active
February 18, 2023 02:38
-
-
Save shivase/1d43934e635b1d3a9a5d6464becb88b6 to your computer and use it in GitHub Desktop.
whisperを使って、指定したフォルダ配下のmp4動画すべての字幕(srt形式)を出力する
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# 事前要件 | |
# * whisperをpipでインストール | |
# * ffmpegをbrewなりでインストール | |
# | |
# モデル名(右にいくほど精度は上がりますが時間がかかります) | |
# tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large | |
MODEL="tiny.en" | |
# 出力ファイル名 : txt,vtt,srt,tsv,json,all | |
OUTPUT_FORMAT="srt" | |
# 音声の言語 : en,ja等 | |
INPUT_LANGUAGE="en" | |
# スレッド数 | |
THREADS=8 | |
# SRTファイルが存在した場合にスキップするか | |
SKIP_SUBTITLE_FILE_EXIST=true | |
DIR_PATH=$1 | |
IFS=" | |
" | |
files=`find "$DIR_PATH" -type f -name *.mp4 | sort` | |
file_num=`find ${DIR_PATH} -name \*.mp4 | wc -l | awk '{print $1}'` | |
current_file_num=1 | |
total_time=0 | |
echo start whisper \( total files are ${file_num} \) | |
for target in $files; | |
do | |
start_time=`date +%s` | |
basename=$(basename $target) | |
dirname=$(dirname $target) | |
subtitle_file_name="${target}.${OUTPUT_FORMAT}" | |
if [ "${SKIP_SUBTITLE_FILE_EXIST}" ] && [ -f "${subtitle_file_name}" ]; then | |
echo WHISPER SKIP --- ${basename} | |
current_file_num=$((current_file_num + 1)) | |
continue | |
fi | |
echo WHISPER START \(${current_file_num}/${file_num}\) : ${basename} | |
whisper --model ${MODEL} \ | |
--device cpu \ | |
--output_format ${OUTPUT_FORMAT} \ | |
--output_dir "$dirname" \ | |
--fp16 False \ | |
--task transcribe \ | |
--verbose False \ | |
--threads ${THREADS} \ | |
--language ${INPUT_LANGUAGE} \ | |
"$target" | |
end_time=`date +%s` | |
run_time=$((end_time - start_time)) | |
total_time=$((total_time + run_time)) | |
current_file_num=$((current_file_num + 1)) | |
echo WHISPER END \( exec time ${run_time} seconds \) | |
done | |
echo Finished!! | |
echo Total time : ${total_time} seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment