Skip to content

Instantly share code, notes, and snippets.

@kwea123
Last active November 15, 2022 09:15
Show Gist options
  • Save kwea123/e76cf754c8e2bd65576720505a36a899 to your computer and use it in GitHub Desktop.
Save kwea123/e76cf754c8e2bd65576720505a36a899 to your computer and use it in GitHub Desktop.
whisper.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"private_outputs": true,
"collapsed_sections": [],
"authorship_tag": "ABX9TyPTN4AH8zHqNhbz9GoCD0SN",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kwea123/e76cf754c8e2bd65576720505a36a899/whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NYwreAggjMRu"
},
"outputs": [],
"source": [
"# install whisper\n",
"!pip install git+https://github.com/openai/whisper.git"
]
},
{
"cell_type": "code",
"source": [
"# mount drive\n",
"from google.colab import drive\n",
"drive.mount('/content/drive/', force_remount=True)"
],
"metadata": {
"id": "ixdvyG9Fnlpr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os\n",
"import glob\n",
"\n",
"use_drive = True # 是否用google drive(如果是上傳到這裡,設為\"False\"(去掉引號))\n",
"folder = 'kouhaku2021' # 如果使用google drive,指定包含影片的資料夾(可包含多個影片),須為MyDrive下的相對路徑\n",
" # 如果是上傳檔案則不用修改\n",
"language = 'Chinese' # 影片語言\n",
"translate = False # 是否將字幕翻譯成英文 (如果要翻譯,設為\"True\"(去掉引號))\n",
"\n",
"if use_drive:\n",
" vids = glob.glob(os.path.join('/content/drive/MyDrive', folder, '*.mp4'))\n",
" out_dir = os.path.join('/content/drive/MyDrive', folder, 'subtitles')\n",
"else:\n",
" vids = glob.glob('./*.mp4')\n",
" out_dir = './subtitles'\n",
"for vid in vids:\n",
" print(f'處理 {vid} 中...')\n",
" cmd = f'whisper \"{vid}\" --language {language} \\\n",
" --model medium --output_dir {out_dir} --verbose False \\\n",
" --logprob_threshold None --no_speech_threshold 0.01'\n",
" if translate: cmd += ' --task translate'\n",
" os.system(cmd)"
],
"metadata": {
"id": "Lm7H6i7Vkn46"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "_le-Ud7dXmKg"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment