Last active
November 15, 2022 09:15
-
-
Save kwea123/e76cf754c8e2bd65576720505a36a899 to your computer and use it in GitHub Desktop.
whisper.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"private_outputs": true, | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyPTN4AH8zHqNhbz9GoCD0SN", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kwea123/e76cf754c8e2bd65576720505a36a899/whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "NYwreAggjMRu" | |
}, | |
"outputs": [], | |
"source": [ | |
"# install whisper\n", | |
"!pip install git+https://github.com/openai/whisper.git" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# mount drive\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive/', force_remount=True)" | |
], | |
"metadata": { | |
"id": "ixdvyG9Fnlpr" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import os\n", | |
"import glob\n", | |
"\n", | |
"use_drive = True # 是否用google drive(如果是上傳到這裡,設為\"False\"(去掉引號))\n", | |
"folder = 'kouhaku2021' # 如果使用google drive,指定包含影片的資料夾(可包含多個影片),須為MyDrive下的相對路徑\n", | |
" # 如果是上傳檔案則不用修改\n", | |
"language = 'Chinese' # 影片語言\n", | |
"translate = False # 是否將字幕翻譯成英文 (如果要翻譯,設為\"True\"(去掉引號))\n", | |
"\n", | |
"if use_drive:\n", | |
" vids = glob.glob(os.path.join('/content/drive/MyDrive', folder, '*.mp4'))\n", | |
" out_dir = os.path.join('/content/drive/MyDrive', folder, 'subtitles')\n", | |
"else:\n", | |
" vids = glob.glob('./*.mp4')\n", | |
" out_dir = './subtitles'\n", | |
"for vid in vids:\n", | |
" print(f'處理 {vid} 中...')\n", | |
" cmd = f'whisper \"{vid}\" --language {language} \\\n", | |
" --model medium --output_dir {out_dir} --verbose False \\\n", | |
" --logprob_threshold None --no_speech_threshold 0.01'\n", | |
" if translate: cmd += ' --task translate'\n", | |
" os.system(cmd)" | |
], | |
"metadata": { | |
"id": "Lm7H6i7Vkn46" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "_le-Ud7dXmKg" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment