Created
December 24, 2025 12:07
-
-
Save chottokun/f653cfdb9144f325ca878adeb6bd967d to your computer and use it in GitHub Desktop.
whisper+stable-ts.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyPoeaafoGqd8OkS5JbP32yG", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/chottokun/f653cfdb9144f325ca878adeb6bd967d/whisper-stable-ts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "08b4c072" | |
| }, | |
| "source": [ | |
| "# `stable-ts` ใจ `faster-whisper` ใ็จใใ้ณๅฃฐ่ช่ญ็ฐๅข\n", | |
| "\n", | |
| "ใๆ็คบใใใ ใใ่จญๅฎใซๅบใฅใใ`stable-ts` ใจ `faster-whisper` ใ็จใใ้ณๅฃฐ่ช่ญ็ฐๅขใๆง็ฏใใพใใ\n", | |
| "\n", | |
| "**โ ๏ธ ้่ฆ: ๅฎ่กๅใซ [ใฉใณใฟใคใ ] > [ใฉใณใฟใคใ ใฎใฟใคใใๅคๆด] ใใใใผใใฆใงใขใขใฏใปใฉใฌใผใฟใใT4 GPUใใซ่จญๅฎใใฆใใ ใใใ**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "11901012", | |
| "outputId": "62a6e2ef-a837-4f01-a0a1-6b48a3ac43ee" | |
| }, | |
| "source": [ | |
| "# @title 1. ใฉใคใใฉใชใฎใคใณในใใผใซ\n", | |
| "# stable-ts ใจ faster-whisper ใใคใณในใใผใซใใพใ\n", | |
| "!pip install stable-ts faster-whisper\n", | |
| "\n", | |
| "import torch\n", | |
| "import stable_whisper\n", | |
| "from google.colab import files\n", | |
| "\n", | |
| "# GPUใไฝฟใใใ็ขบ่ช\n", | |
| "if torch.cuda.is_available():\n", | |
| " print(\"\\nโ GPU (CUDA) ใๅฉ็จๅฏ่ฝใงใใ้ซ้ๆจ่ซใขใผใใงๅไฝใใพใใ\")\n", | |
| "else:\n", | |
| " print(\"\\nโ ๏ธ GPUใๆคๅบใใใพใใใงใใใใฉใณใฟใคใ ใฎ่จญๅฎใใGPUใๆๅนใซใใฆใใ ใใใCPUใงใฏ้ๅธธใซ้ ใใชใใพใใ\")" | |
| ], | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Requirement already satisfied: stable-ts in /usr/local/lib/python3.12/dist-packages (2.19.1)\n", | |
| "Requirement already satisfied: faster-whisper in /usr/local/lib/python3.12/dist-packages (1.2.1)\n", | |
| "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from stable-ts) (2.0.2)\n", | |
| "Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (from stable-ts) (2.9.0+cu126)\n", | |
| "Requirement already satisfied: torchaudio in /usr/local/lib/python3.12/dist-packages (from stable-ts) (2.9.0+cu126)\n", | |
| "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from stable-ts) (4.67.1)\n", | |
| "Requirement already satisfied: openai-whisper<=20250625,>=20230314 in /usr/local/lib/python3.12/dist-packages (from stable-ts) (20250625)\n", | |
| "Requirement already satisfied: ctranslate2<5,>=4.0 in /usr/local/lib/python3.12/dist-packages (from faster-whisper) (4.6.2)\n", | |
| "Requirement already satisfied: huggingface-hub>=0.21 in /usr/local/lib/python3.12/dist-packages (from faster-whisper) (0.36.0)\n", | |
| "Requirement already satisfied: tokenizers<1,>=0.13 in /usr/local/lib/python3.12/dist-packages (from faster-whisper) (0.22.1)\n", | |
| "Requirement already satisfied: onnxruntime<2,>=1.14 in /usr/local/lib/python3.12/dist-packages (from faster-whisper) (1.23.2)\n", | |
| "Requirement already satisfied: av>=11 in /usr/local/lib/python3.12/dist-packages (from faster-whisper) (16.0.1)\n", | |
| "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (75.2.0)\n", | |
| "Requirement already satisfied: pyyaml<7,>=5.3 in /usr/local/lib/python3.12/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (6.0.3)\n", | |
| "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (3.20.0)\n", | |
| "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (2025.3.0)\n", | |
| "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (25.0)\n", | |
| "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (2.32.4)\n", | |
| "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (4.15.0)\n", | |
| "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21->faster-whisper) (1.2.0)\n", | |
| "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.12/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (15.0.1)\n", | |
| "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.12/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (25.9.23)\n", | |
| "Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (5.29.5)\n", | |
| "Requirement already satisfied: sympy in /usr/local/lib/python3.12/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (1.14.0)\n", | |
| "Requirement already satisfied: more-itertools in /usr/local/lib/python3.12/dist-packages (from openai-whisper<=20250625,>=20230314->stable-ts) (10.8.0)\n", | |
| "Requirement already satisfied: numba in /usr/local/lib/python3.12/dist-packages (from openai-whisper<=20250625,>=20230314->stable-ts) (0.60.0)\n", | |
| "Requirement already satisfied: tiktoken in /usr/local/lib/python3.12/dist-packages (from openai-whisper<=20250625,>=20230314->stable-ts) (0.12.0)\n", | |
| "Requirement already satisfied: triton>=2 in /usr/local/lib/python3.12/dist-packages (from openai-whisper<=20250625,>=20230314->stable-ts) (3.5.0)\n", | |
| "Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (3.6.1)\n", | |
| "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (3.1.6)\n", | |
| "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.77)\n", | |
| "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.77)\n", | |
| "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.80)\n", | |
| "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (9.10.2.21)\n", | |
| "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.4.1)\n", | |
| "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (11.3.0.4)\n", | |
| "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (10.3.7.77)\n", | |
| "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (11.7.1.2)\n", | |
| "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.5.4.2)\n", | |
| "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (0.7.1)\n", | |
| "Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (2.27.5)\n", | |
| "Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (3.3.20)\n", | |
| "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.77)\n", | |
| "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (12.6.85)\n", | |
| "Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch->stable-ts) (1.11.1.6)\n", | |
| "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy->onnxruntime<2,>=1.14->faster-whisper) (1.3.0)\n", | |
| "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.12/dist-packages (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper) (10.0)\n", | |
| "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch->stable-ts) (3.0.3)\n", | |
| "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.12/dist-packages (from numba->openai-whisper<=20250625,>=20230314->stable-ts) (0.43.0)\n", | |
| "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21->faster-whisper) (3.4.4)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21->faster-whisper) (3.11)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21->faster-whisper) (2.5.0)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21->faster-whisper) (2025.11.12)\n", | |
| "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.12/dist-packages (from tiktoken->openai-whisper<=20250625,>=20230314->stable-ts) (2025.11.3)\n", | |
| "\n", | |
| "โ GPU (CUDA) ใๅฉ็จๅฏ่ฝใงใใ้ซ้ๆจ่ซใขใผใใงๅไฝใใพใใ\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 1000 | |
| }, | |
| "id": "d1242fbe", | |
| "outputId": "1922227e-ec58-4cf4-a5cb-0fb65b91fca5" | |
| }, | |
| "source": [ | |
| "# @title 2. ้ณๅฃฐใใกใคใซใใขใใใญใผใใใฆๅฎ่ก\n", | |
| "# ใขใใซใฎใญใผใ๏ผๅๅใฏใใฆใณใญใผใใซๆ้ใใใใใพใ๏ผ\n", | |
| "print(\"โณ ใขใใซ(large-v3-turbo)ใใญใผใไธญ...\")\n", | |
| "model = stable_whisper.load_faster_whisper(\n", | |
| " \"large-v3-turbo\",\n", | |
| " device=\"cuda\" if torch.cuda.is_available() else \"cpu\",\n", | |
| " compute_type=\"float16\" if torch.cuda.is_available() else \"int8\"\n", | |
| ")\n", | |
| "print(\"โ ใขใใซใญใผใๅฎไบ\")\n", | |
| "\n", | |
| "# ้ณๅฃฐใใกใคใซใฎใขใใใญใผใ\n", | |
| "print(\"\\n๐ ้ณๅฃฐใใกใคใซ(mp3, wav, m4a็ญ)ใใขใใใญใผใใใฆใใ ใใ:\")\n", | |
| "uploaded = files.upload()\n", | |
| "\n", | |
| "for filename in uploaded.keys():\n", | |
| " print(f\"\\n๐๏ธ ่งฃๆไธญ: {filename} ...\")\n", | |
| "\n", | |
| " #\n", | |
| " result = model.transcribe(\n", | |
| " filename,\n", | |
| " language=\"ja\",\n", | |
| "\n", | |
| " #\n", | |
| " vad=True, # VAD(้ณๅฃฐๅบ้ๆคๅบ)ใๆๅนๅ\n", | |
| " condition_on_previous_text=False,# ๅใฎๆ่ใซไพๅญใใชใ๏ผใซใผใๅฏพ็ญใฎ่ฆ๏ผ\n", | |
| " word_timestamps=False, # ๅ่ชใใจใฎใฟใคใ ในใฟใณใใไฝฟใใชใ๏ผๅฎๅฎๅ๏ผ\n", | |
| " beam_size=5 # ๆข็ดขๅน ๏ผ็ฒพๅบฆๅไธใฎใใ๏ผ\n", | |
| " )\n", | |
| "\n", | |
| " # ็ตๆใฎ่กจ็คบ\n", | |
| " print(\"\\n\" + \"=\"*30 + \" ่ช่ญ็ตๆ \" + \"=\"*30)\n", | |
| " for segment in result.segments:\n", | |
| " # ็งๆฐใๆดๅฝขใใฆ่กจ็คบ\n", | |
| " start = f\"{segment.start:.2f}\"\n", | |
| " end = f\"{segment.end:.2f}\"\n", | |
| " print(f\"[{start}s -> {end}s] {segment.text}\")\n", | |
| " print(\"=\"*68)\n", | |
| "\n", | |
| " # SRTใใกใคใซ๏ผๅญๅนใใกใคใซ๏ผใจใใฆไฟๅญใปใใฆใณใญใผใ\n", | |
| " srt_filename = filename.rsplit('.', 1)[0] + \".srt\"\n", | |
| " result.to_srt_vtt(srt_filename)\n", | |
| " files.download(srt_filename)\n", | |
| " print(f\"\\n๐พ ๅญๅนใใกใคใซ({srt_filename})ใใใฆใณใญใผใใใพใใใ\")" | |
| ], | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "โณ ใขใใซ(large-v3-turbo)ใใญใผใไธญ...\n", | |
| "โ ใขใใซใญใผใๅฎไบ\n", | |
| "\n", | |
| "๐ ้ณๅฃฐใใกใคใซ(mp3, wav, m4a็ญ)ใใขใใใญใผใใใฆใใ ใใ:\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <input type=\"file\" id=\"files-89316340-2eac-4148-a9fc-dbf16a097068\" name=\"files[]\" multiple disabled\n", | |
| " style=\"border:none\" />\n", | |
| " <output id=\"result-89316340-2eac-4148-a9fc-dbf16a097068\">\n", | |
| " Upload widget is only available when the cell has been executed in the\n", | |
| " current browser session. Please rerun this cell to enable.\n", | |
| " </output>\n", | |
| " <script>// Copyright 2017 Google LLC\n", | |
| "//\n", | |
| "// Licensed under the Apache License, Version 2.0 (the \"License\");\n", | |
| "// you may not use this file except in compliance with the License.\n", | |
| "// You may obtain a copy of the License at\n", | |
| "//\n", | |
| "// http://www.apache.org/licenses/LICENSE-2.0\n", | |
| "//\n", | |
| "// Unless required by applicable law or agreed to in writing, software\n", | |
| "// distributed under the License is distributed on an \"AS IS\" BASIS,\n", | |
| "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", | |
| "// See the License for the specific language governing permissions and\n", | |
| "// limitations under the License.\n", | |
| "\n", | |
| "/**\n", | |
| " * @fileoverview Helpers for google.colab Python module.\n", | |
| " */\n", | |
| "(function(scope) {\n", | |
| "function span(text, styleAttributes = {}) {\n", | |
| " const element = document.createElement('span');\n", | |
| " element.textContent = text;\n", | |
| " for (const key of Object.keys(styleAttributes)) {\n", | |
| " element.style[key] = styleAttributes[key];\n", | |
| " }\n", | |
| " return element;\n", | |
| "}\n", | |
| "\n", | |
| "// Max number of bytes which will be uploaded at a time.\n", | |
| "const MAX_PAYLOAD_SIZE = 100 * 1024;\n", | |
| "\n", | |
| "function _uploadFiles(inputId, outputId) {\n", | |
| " const steps = uploadFilesStep(inputId, outputId);\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " // Cache steps on the outputElement to make it available for the next call\n", | |
| " // to uploadFilesContinue from Python.\n", | |
| " outputElement.steps = steps;\n", | |
| "\n", | |
| " return _uploadFilesContinue(outputId);\n", | |
| "}\n", | |
| "\n", | |
| "// This is roughly an async generator (not supported in the browser yet),\n", | |
| "// where there are multiple asynchronous steps and the Python side is going\n", | |
| "// to poll for completion of each step.\n", | |
| "// This uses a Promise to block the python side on completion of each step,\n", | |
| "// then passes the result of the previous step as the input to the next step.\n", | |
| "function _uploadFilesContinue(outputId) {\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " const steps = outputElement.steps;\n", | |
| "\n", | |
| " const next = steps.next(outputElement.lastPromiseValue);\n", | |
| " return Promise.resolve(next.value.promise).then((value) => {\n", | |
| " // Cache the last promise value to make it available to the next\n", | |
| " // step of the generator.\n", | |
| " outputElement.lastPromiseValue = value;\n", | |
| " return next.value.response;\n", | |
| " });\n", | |
| "}\n", | |
| "\n", | |
| "/**\n", | |
| " * Generator function which is called between each async step of the upload\n", | |
| " * process.\n", | |
| " * @param {string} inputId Element ID of the input file picker element.\n", | |
| " * @param {string} outputId Element ID of the output display.\n", | |
| " * @return {!Iterable<!Object>} Iterable of next steps.\n", | |
| " */\n", | |
| "function* uploadFilesStep(inputId, outputId) {\n", | |
| " const inputElement = document.getElementById(inputId);\n", | |
| " inputElement.disabled = false;\n", | |
| "\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " outputElement.innerHTML = '';\n", | |
| "\n", | |
| " const pickedPromise = new Promise((resolve) => {\n", | |
| " inputElement.addEventListener('change', (e) => {\n", | |
| " resolve(e.target.files);\n", | |
| " });\n", | |
| " });\n", | |
| "\n", | |
| " const cancel = document.createElement('button');\n", | |
| " inputElement.parentElement.appendChild(cancel);\n", | |
| " cancel.textContent = 'Cancel upload';\n", | |
| " const cancelPromise = new Promise((resolve) => {\n", | |
| " cancel.onclick = () => {\n", | |
| " resolve(null);\n", | |
| " };\n", | |
| " });\n", | |
| "\n", | |
| " // Wait for the user to pick the files.\n", | |
| " const files = yield {\n", | |
| " promise: Promise.race([pickedPromise, cancelPromise]),\n", | |
| " response: {\n", | |
| " action: 'starting',\n", | |
| " }\n", | |
| " };\n", | |
| "\n", | |
| " cancel.remove();\n", | |
| "\n", | |
| " // Disable the input element since further picks are not allowed.\n", | |
| " inputElement.disabled = true;\n", | |
| "\n", | |
| " if (!files) {\n", | |
| " return {\n", | |
| " response: {\n", | |
| " action: 'complete',\n", | |
| " }\n", | |
| " };\n", | |
| " }\n", | |
| "\n", | |
| " for (const file of files) {\n", | |
| " const li = document.createElement('li');\n", | |
| " li.append(span(file.name, {fontWeight: 'bold'}));\n", | |
| " li.append(span(\n", | |
| " `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n", | |
| " `last modified: ${\n", | |
| " file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n", | |
| " 'n/a'} - `));\n", | |
| " const percent = span('0% done');\n", | |
| " li.appendChild(percent);\n", | |
| "\n", | |
| " outputElement.appendChild(li);\n", | |
| "\n", | |
| " const fileDataPromise = new Promise((resolve) => {\n", | |
| " const reader = new FileReader();\n", | |
| " reader.onload = (e) => {\n", | |
| " resolve(e.target.result);\n", | |
| " };\n", | |
| " reader.readAsArrayBuffer(file);\n", | |
| " });\n", | |
| " // Wait for the data to be ready.\n", | |
| " let fileData = yield {\n", | |
| " promise: fileDataPromise,\n", | |
| " response: {\n", | |
| " action: 'continue',\n", | |
| " }\n", | |
| " };\n", | |
| "\n", | |
| " // Use a chunked sending to avoid message size limits. See b/62115660.\n", | |
| " let position = 0;\n", | |
| " do {\n", | |
| " const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n", | |
| " const chunk = new Uint8Array(fileData, position, length);\n", | |
| " position += length;\n", | |
| "\n", | |
| " const base64 = btoa(String.fromCharCode.apply(null, chunk));\n", | |
| " yield {\n", | |
| " response: {\n", | |
| " action: 'append',\n", | |
| " file: file.name,\n", | |
| " data: base64,\n", | |
| " },\n", | |
| " };\n", | |
| "\n", | |
| " let percentDone = fileData.byteLength === 0 ?\n", | |
| " 100 :\n", | |
| " Math.round((position / fileData.byteLength) * 100);\n", | |
| " percent.textContent = `${percentDone}% done`;\n", | |
| "\n", | |
| " } while (position < fileData.byteLength);\n", | |
| " }\n", | |
| "\n", | |
| " // All done.\n", | |
| " yield {\n", | |
| " response: {\n", | |
| " action: 'complete',\n", | |
| " }\n", | |
| " };\n", | |
| "}\n", | |
| "\n", | |
| "scope.google = scope.google || {};\n", | |
| "scope.google.colab = scope.google.colab || {};\n", | |
| "scope.google.colab._files = {\n", | |
| " _uploadFiles,\n", | |
| " _uploadFilesContinue,\n", | |
| "};\n", | |
| "})(self);\n", | |
| "</script> " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Saving n0001a-The-Ant-and-Grasshopper.mp3 to n0001a-The-Ant-and-Grasshopper (1).mp3\n", | |
| "\n", | |
| "๐๏ธ ่งฃๆไธญ: n0001a-The-Ant-and-Grasshopper (1).mp3 ...\n", | |
| "Detected Language: japanese\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "Transcribe: 100%|โโโโโโโโโโ| 165.88/165.88 [00:04<00:00, 39.68sec/s]\n", | |
| "VAD: 100%|โโโโโโโโโโ| 165.88/165.88 [00:02<00:00, 62.77sec/s]\n", | |
| "Adjustment: 100%|โโโโโโโโโโ| 194.0/194.0 [00:00<00:00, 107503.63sec/s]" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "============================== ่ช่ญ็ตๆ ==============================\n", | |
| "[2.46s -> 6.22s] ๆฅๆฌ่ชๅค่ชญใใใฏใน ใฌใใซใผใญ\n", | |
| "[6.22s -> 14.00s] ใขใชใจใญใชใฎใชใน ใคใฝใใ็ฉ่ชใใ\n", | |
| "[15.68s -> 17.88s] NPO ๅค่จ่ชๅค่ชญ\n", | |
| "[22.43s -> 23.18s] ๅคใงใ\n", | |
| "[23.18s -> 27.52s] ใญใชใฎใชในใๆจใฎไธใซใใพใ\n", | |
| "[29.66s -> 31.90s] ใญใชใฎใชในใฏใใใใใใใพใ\n", | |
| "[33.60s -> 34.58s] ใฉใฉใฉใฉใฉ\n", | |
| "[38.11s -> 39.26s] ใขใชใใใพใใ\n", | |
| "[40.96s -> 43.54s] ใขใชใฏใในใใฎใใฏใใณใพใ\n", | |
| "[45.47s -> 47.18s] ใญใชใฎใชในใใใใพใใ\n", | |
| "[47.18s -> 52.84s] ใขใชใใใใใฃใใใซใใใใใใใพใใใ\n", | |
| "[55.30s -> 56.62s] ใขใชใฏใใใพใใ\n", | |
| "[56.62s -> 60.90s] ใใใใใใใใพใใ\n", | |
| "[62.62s -> 64.78s] ใใใใใกใฏใฏใใใใพใ\n", | |
| "[68.03s -> 69.88s] ใญใชใฎใชในใฏใใใพใใ\n", | |
| "[71.84s -> 73.02s] ใฉใใใฆใงใใ\n", | |
| "[73.02s -> 78.38s] ใตใใฏใในใใฎใใใใพใใใใ\n", | |
| "[80.03s -> 83.38s] ใใใใพใฏใชใคใงใใ\n", | |
| "[85.25s -> 87.10s] ใญใชใฎใชในใฏ็ฌใใพใใ\n", | |
| "[89.95s -> 91.56s] ใใใใๆฏๆฅ\n", | |
| "[91.56s -> 95.86s] ใญใชใฎใชในใฏๆญใๆญใใพใใ\n", | |
| "[98.43s -> 99.96s] ๅใใพใใใงใใ\n", | |
| "[103.90s -> 104.60s] ๅฌใงใ\n", | |
| "[106.14s -> 107.38s] ้ชใ้ใใพใ\n", | |
| "[109.31s -> 110.72s] ใจใฆใๅฏใใงใ\n", | |
| "[110.72s -> 116.78s] ใญใชใฎใชในใฎๅฎถใซใฏ้ฃใน็ฉใใใใพใใ\n", | |
| "[116.78s -> 125.52s] ใขใชใฎๅฎถใซใฏ้ฃใน็ฉใใใใใใใใพใ\n", | |
| "[125.52s -> 131.12s] ใญใชใฎใชในใฏใขใชใฎๅฎถใธ่กใใพใใ\n", | |
| "[134.75s -> 136.46s] ใญใชใฎใชในใฏ่จใใพใใ\n", | |
| "[136.46s -> 144.54s] ใขใชใใใใ้กใใงใใ้ฃใน็ฉใใใ ใใ\n", | |
| "[147.33s -> 148.64s] ใขใชใฏ่จใใพใใ\n", | |
| "[148.64s -> 154.26s] ็งใใกใฏๅคๅใใพใใ\n", | |
| "[154.26s -> 158.58s] ใ ใใ้ฃใน็ฉใใใใพใ\n", | |
| "[160.54s -> 164.02s] ใใชใใฏๅคไฝใใใพใใใ\n", | |
| "[165.88s -> 194.00s] ใ่ฆ่ดใใใใจใใใใใพใใ\n", | |
| "====================================================================\n", | |
| "Saved: /content/n0001a-The-Ant-and-Grasshopper (1).srt\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "\n", | |
| "/usr/local/lib/python3.12/dist-packages/stable_whisper/text_output.py:183: UserWarning: Result is missing word timestamps. Word-level timing cannot be exported. Use ``word_level=False`` to avoid this warning\n", | |
| " warnings.warn('Result is missing word timestamps. Word-level timing cannot be exported. '\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "\n", | |
| " async function download(id, filename, size) {\n", | |
| " if (!google.colab.kernel.accessAllowed) {\n", | |
| " return;\n", | |
| " }\n", | |
| " const div = document.createElement('div');\n", | |
| " const label = document.createElement('label');\n", | |
| " label.textContent = `Downloading \"${filename}\": `;\n", | |
| " div.appendChild(label);\n", | |
| " const progress = document.createElement('progress');\n", | |
| " progress.max = size;\n", | |
| " div.appendChild(progress);\n", | |
| " document.body.appendChild(div);\n", | |
| "\n", | |
| " const buffers = [];\n", | |
| " let downloaded = 0;\n", | |
| "\n", | |
| " const channel = await google.colab.kernel.comms.open(id);\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| "\n", | |
| " for await (const message of channel.messages) {\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| " if (message.buffers) {\n", | |
| " for (const buffer of message.buffers) {\n", | |
| " buffers.push(buffer);\n", | |
| " downloaded += buffer.byteLength;\n", | |
| " progress.value = downloaded;\n", | |
| " }\n", | |
| " }\n", | |
| " }\n", | |
| " const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
| " const a = document.createElement('a');\n", | |
| " a.href = window.URL.createObjectURL(blob);\n", | |
| " a.download = filename;\n", | |
| " div.appendChild(a);\n", | |
| " a.click();\n", | |
| " div.remove();\n", | |
| " }\n", | |
| " " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "download(\"download_1811b4a0-ba20-4fa6-8ecd-601b4484dde4\", \"n0001a-The-Ant-and-Grasshopper (1).srt\", 2375)" | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "๐พ ๅญๅนใใกใคใซ(n0001a-The-Ant-and-Grasshopper (1).srt)ใใใฆใณใญใผใใใพใใใ\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "DDnKDNqlNn5H" | |
| }, | |
| "execution_count": 8, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "210e4e37" | |
| }, | |
| "source": [ | |
| "# @title 3. ใตใณใใซ้ณๅฃฐใงใฎใในใ\n", | |
| "# ๆๅฎใใใURLใใ้ณๅฃฐใจๆญฃ่งฃใใญในใใใใฆใณใญใผใใใฆๆๅญ่ตทใใใ่กใใพใใ" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 1000 | |
| }, | |
| "id": "50542827", | |
| "outputId": "51668824-2c31-4358-82fb-1657f218f709" | |
| }, | |
| "source": [ | |
| "import os\n", | |
| "\n", | |
| "# ใใกใคใซใฎใใฆใณใญใผใ\n", | |
| "audio_url = \"https://clrd.ninjal.ac.jp/csj/sound-f/interview_aps-smp.mp3\"\n", | |
| "text_url = \"https://clrd.ninjal.ac.jp/csj/trans-f/interview_aps-smp.txt\"\n", | |
| "audio_file = \"interview_aps-smp.mp3\"\n", | |
| "text_file = \"interview_aps-smp.txt\"\n", | |
| "\n", | |
| "!wget -q -O {audio_file} {audio_url}\n", | |
| "!wget -q -O {text_file} {text_url}\n", | |
| "\n", | |
| "print(f\"โ ใใฆใณใญใผใๅฎไบ: {audio_file}, {text_file}\")\n", | |
| "\n", | |
| "# ใขใใซใใญใผใใใใฆใใชใๅ ดๅใฏใญใผใใใ๏ผๅใฎใปใซใๅฎ่กใใฆใใชใๅ ดๅ็จ๏ผ\n", | |
| "if 'model' not in locals():\n", | |
| " print(\"โณ ใขใใซ(large-v3-turbo)ใใญใผใไธญ...\")\n", | |
| " model = stable_whisper.load_faster_whisper(\n", | |
| " \"large-v3-turbo\",\n", | |
| " device=\"cuda\" if torch.cuda.is_available() else \"cpu\",\n", | |
| " compute_type=\"float16\" if torch.cuda.is_available() else \"int8\"\n", | |
| " )\n", | |
| "\n", | |
| "# ๆๅญ่ตทใใๅฎ่ก\n", | |
| "print(f\"\\n๐๏ธ ่งฃๆไธญ: {audio_file} ...\")\n", | |
| "result = model.transcribe(\n", | |
| " audio_file,\n", | |
| " language=\"ja\",\n", | |
| " vad=True,\n", | |
| " condition_on_previous_text=False,\n", | |
| " word_timestamps=False,\n", | |
| " beam_size=5\n", | |
| ")\n", | |
| "\n", | |
| "# ็ตๆ่กจ็คบ\n", | |
| "print(\"\\n\" + \"=\"*30 + \" Whisper ่ช่ญ็ตๆ \" + \"=\"*30)\n", | |
| "for segment in result.segments:\n", | |
| " print(f\"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\")\n", | |
| "\n", | |
| "print(\"\\n\" + \"=\"*30 + \" ๆญฃ่งฃใใผใฟ (ๅ่) \" + \"=\"*30)\n", | |
| "# ใจใณใณใผใใฃใณใฐใ่ชๅๅคๅฅใใฆ่กจ็คบ๏ผShift_JISใชใฉใฎๅฏ่ฝๆงใใใใใ๏ผ\n", | |
| "encodings = ['utf-8', 'shift_jis', 'euc-jp']\n", | |
| "reference_text = \"\"\n", | |
| "for enc in encodings:\n", | |
| " try:\n", | |
| " with open(text_file, 'r', encoding=enc) as f:\n", | |
| " reference_text = f.read()\n", | |
| " break\n", | |
| " except UnicodeDecodeError:\n", | |
| " continue\n", | |
| "\n", | |
| "print(reference_text[:2000] + (\"...\" if len(reference_text) > 2000 else \"\")) # ้ทใใใๅ ดๅใฏ็็ฅ\n", | |
| "\n", | |
| "# SRTไฟๅญ\n", | |
| "srt_filename = audio_file.rsplit('.', 1)[0] + \".srt\"\n", | |
| "result.to_srt_vtt(srt_filename)\n", | |
| "files.download(srt_filename)\n", | |
| "print(f\"\\n๐พ ๅญๅนใใกใคใซ({srt_filename})ใใใฆใณใญใผใใใพใใใ\")" | |
| ], | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "โ ใใฆใณใญใผใๅฎไบ: interview_aps-smp.mp3, interview_aps-smp.txt\n", | |
| "\n", | |
| "๐๏ธ ่งฃๆไธญ: interview_aps-smp.mp3 ...\n", | |
| "Detected Language: japanese\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "Transcribe: 100%|โโโโโโโโโโ| 71.75/71.75 [00:02<00:00, 32.79sec/s] \n", | |
| "VAD: 100%|โโโโโโโโโโ| 71.75/71.75 [00:01<00:00, 64.04sec/s]\n", | |
| "Adjustment: 100%|โโโโโโโโโโ| 71.14/71.14 [00:00<00:00, 238324.91sec/s]" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "============================== Whisper ่ช่ญ็ตๆ ==============================\n", | |
| "[0.35s -> 12.24s] ่ณชๅใใใฆใใใ ใใพใใ่ชญใใ ใใงใใใใฉใใ็่งฃใซใฏ้ ใๅใฐใใจใใๆใใงใ่จ่ใฎๆๅณใใใ่ใใใใใจๆใใใงใใใฉใ\n", | |
| "[12.80s -> 20.68s] ใใฉ่จ่ชๆ ๅ ฑใฃใฆใใ่จ่ใจใใใจใใฉใซใใณใใใใจ่ด้ณๅใฃใฆใใใใจใซใคใใฆใพใใฏใใใซ่ใใใฆใใ ใใใ\n", | |
| "[20.68s -> 25.56s] 3ใคใญใ่จ่ชใฃใฆใฎใฏใใใใพใใใญใ่จ่ใงใใใญใ\n", | |
| "[25.96s -> 33.34s] ่จ่ชๆ ๅ ฑใฃใฆใใใฎใฏใญใ็ฐกๅใซ่จใใฐๅ่ชใฎๆๅณใ่พๆธใซๆธใใฆใใใพใใใญใ\n", | |
| "[33.34s -> 44.18s] ใใใใๅ่ชใใใฃใคใใใจใใซใใใฃใคใใๆใไฝใฃใใใใใจใใซใๅ่ฉใซๅฉ่ฉใใใฃใคใใฆใๅ่ฉใใใฃใฆใๆๅพใซๅฉๅ่ฉใใใฃใฆใ\n", | |
| "[44.70s -> 49.88s] ใใใใๆฎ้ใซ่จ่ชๅญฆใฎๆ็งๆธใซๆธใใฆใใใใใชใใใใ่จ่ชๆ ๅ ฑใงใใญใ\n", | |
| "[49.88s -> 60.90s] ใงใใใฉใฃใฆ่จ่ใฏใญใ่ชๆบ็ใซใฏใญใใใฎใใชใใจใใฎๆจชใซใใจใใญใ้ฃใซใใจใใญใใใใใๆๅณใชใใงใใใ\n", | |
| "[62.50s -> 71.14s] ใงใใคใพใ่จ่ชใใใกใใฃใจใใใใจใใใซใใๆ ๅ ฑใใ ใใฉใใ ใใฉใใใฎๅฎ้ใซใฏๅญๅจใใฆใใๆ ๅ ฑใฃใฆใใใใจใงใใญใ\n", | |
| "\n", | |
| "============================== ๆญฃ่งฃใใผใฟ (ๅ่) ==============================\n", | |
| "%่ฌๆผID:D04M0041\n", | |
| "%\n", | |
| "%<SOT>\n", | |
| "%%ใ็ฅใ\n", | |
| "0003 00008.805-00012.085 L:\n", | |
| "่ณชๅใใใฆใใใ ใใพใ & ใทใใขใณใตใปใใคใฟใใญใใน \n", | |
| "(F ใใฎ) & (F ใขใ) \n", | |
| "่ชญใใ ใใงใใใใฉใ & ใจใณใใณใในใฑใฌใใข \n", | |
| "0004 00009.417-00009.838 R:\n", | |
| "(F ใใ) & (F <VN>) \n", | |
| "0005 00011.770-00012.901 R:\n", | |
| "(F ใใ) & (F <VN>) \n", | |
| "ๅใใใชใใฃใ & ใฏใซใฉใใซใใฟ \n", | |
| "0006 00012.536-00013.221 L:\n", | |
| "ๅคงๆตใฎ & ใฟใค(็ฌ ใใผใ) \n", | |
| "0007 00013.250-00014.315 R:<็ฌ>\n", | |
| "0008 00013.698-00016.817 L:\n", | |
| "(F ใใฎใผ) & (F ใขใใผ) \n", | |
| "็่งฃใซใฏ & ใชใซใคใใฏ \n", | |
| "้ ใ & ใใผใฏ \n", | |
| "ๅใฐใใจใใ & ใชใจใใบใใฆใผ \n", | |
| "ๆใใง & ใซใณใธใ \n", | |
| "0009 00017.156-00018.411 L:\n", | |
| "(F ใใฎใผ) & (F ใขใใผ) \n", | |
| "่จ่ใฎ & ใณใใใ \n", | |
| "ๆๅณ & ใคใ \n", | |
| "0010 00018.861-00020.915 L:\n", | |
| "ใใ & ใซใฉ \n", | |
| "ใ่ใใใใใจ & ใชใญใญใทใฟใคใ \n", | |
| "ๆใใใงใใใฉ & ใชใขใฆใณใในใฑใ \n", | |
| "0011 00019.775-00020.137 R:\n", | |
| "(F ใฏใ)(F ใฏใ) & (F (? ใ)ใค)(F ใใค) \n", | |
| "0012 00020.603-00020.823 R:\n", | |
| "(F ใใ) & (F <VN>) ...\n", | |
| "Saved: /content/interview_aps-smp.srt\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "\n", | |
| "/usr/local/lib/python3.12/dist-packages/stable_whisper/text_output.py:183: UserWarning: Result is missing word timestamps. Word-level timing cannot be exported. Use ``word_level=False`` to avoid this warning\n", | |
| " warnings.warn('Result is missing word timestamps. Word-level timing cannot be exported. '\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "\n", | |
| " async function download(id, filename, size) {\n", | |
| " if (!google.colab.kernel.accessAllowed) {\n", | |
| " return;\n", | |
| " }\n", | |
| " const div = document.createElement('div');\n", | |
| " const label = document.createElement('label');\n", | |
| " label.textContent = `Downloading \"${filename}\": `;\n", | |
| " div.appendChild(label);\n", | |
| " const progress = document.createElement('progress');\n", | |
| " progress.max = size;\n", | |
| " div.appendChild(progress);\n", | |
| " document.body.appendChild(div);\n", | |
| "\n", | |
| " const buffers = [];\n", | |
| " let downloaded = 0;\n", | |
| "\n", | |
| " const channel = await google.colab.kernel.comms.open(id);\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| "\n", | |
| " for await (const message of channel.messages) {\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| " if (message.buffers) {\n", | |
| " for (const buffer of message.buffers) {\n", | |
| " buffers.push(buffer);\n", | |
| " downloaded += buffer.byteLength;\n", | |
| " progress.value = downloaded;\n", | |
| " }\n", | |
| " }\n", | |
| " }\n", | |
| " const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
| " const a = document.createElement('a');\n", | |
| " a.href = window.URL.createObjectURL(blob);\n", | |
| " a.download = filename;\n", | |
| " div.appendChild(a);\n", | |
| " a.click();\n", | |
| " div.remove();\n", | |
| " }\n", | |
| " " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "download(\"download_d2e992ee-ba9d-402d-8c8d-c58b12a53443\", \"interview_aps-smp.srt\", 1417)" | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "๐พ ๅญๅนใใกใคใซ(interview_aps-smp.srt)ใใใฆใณใญใผใใใพใใใ\n" | |
| ] | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment