Skip to content

Instantly share code, notes, and snippets.

@phu54321
Created June 24, 2024 10:43
Show Gist options
  • Save phu54321/ca8a957ad41f58cded34823fca1f2afc to your computer and use it in GitHub Desktop.
Save phu54321/ca8a957ad41f58cded34823fca1f2afc to your computer and use it in GitHub Desktop.
Faster Whisper Colab Runner.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyM4vtPG2FtEpuoNrjcGGBMr",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"1176f9c23d5a475b894c01998ce80114": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_17fcf507e79542f197706d006cc70cfc",
"max": 19.0635625,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b2845afe89344d0da49b349a2b95d355",
"value": 19.0635625
}
},
"17fcf507e79542f197706d006cc70cfc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b2845afe89344d0da49b349a2b95d355": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/phu54321/ca8a957ad41f58cded34823fca1f2afc/faster-whisper-colab-runner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"collapsed": true,
"id": "vO9qJvQI9iZS",
"outputId": "59bd4943-7790-475e-bcbd-63f399ffcbc7",
"cellView": "form"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting faster-whisper\n",
" Downloading faster_whisper-1.0.2-py3-none-any.whl (1.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting yt-dlp\n",
" Downloading yt_dlp-2024.5.27-py3-none-any.whl (3.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting av<13,>=11.0 (from faster-whisper)\n",
" Downloading av-12.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.3/34.3 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting ctranslate2<5,>=4.0 (from faster-whisper)\n",
" Downloading ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (192.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.3/192.3 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: huggingface-hub>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.23.4)\n",
"Requirement already satisfied: tokenizers<1,>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.19.1)\n",
"Collecting onnxruntime<2,>=1.14 (from faster-whisper)\n",
" Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting brotli (from yt-dlp)\n",
" Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.0 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2024.6.2)\n",
"Collecting mutagen (from yt-dlp)\n",
" Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.4/194.4 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting pycryptodomex (from yt-dlp)\n",
" Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: requests<3,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.31.0)\n",
"Requirement already satisfied: urllib3<3,>=1.26.17 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.0.7)\n",
"Collecting websockets>=12.0 (from yt-dlp)\n",
" Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (67.7.2)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (1.25.2)\n",
"Requirement already satisfied: pyyaml<7,>=5.3 in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (6.0.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (3.15.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (2023.6.0)\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (24.1)\n",
"Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.66.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.12.2)\n",
"Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)\n",
" Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (24.3.25)\n",
"Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (1.12.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.7)\n",
"Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)\n",
" Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<2,>=1.14->faster-whisper) (1.3.0)\n",
"Installing collected packages: brotli, websockets, pycryptodomex, mutagen, humanfriendly, ctranslate2, av, yt-dlp, coloredlogs, onnxruntime, faster-whisper\n",
"Successfully installed av-12.1.0 brotli-1.1.0 coloredlogs-15.0.1 ctranslate2-4.3.1 faster-whisper-1.0.2 humanfriendly-10.0 mutagen-1.47.0 onnxruntime-1.18.0 pycryptodomex-3.20.0 websockets-12.0 yt-dlp-2024.5.27\n"
]
}
],
"source": [
"#@title Install dependencies\n",
"!pip install faster-whisper yt-dlp"
]
},
{
"cell_type": "code",
"source": [
"#@title Transcribe audio to .srt file\n",
"\n",
"from ipywidgets import FloatProgress\n",
"from datetime import datetime\n",
"import os\n",
"from faster_whisper import WhisperModel\n",
"\n",
"model = WhisperModel(\"medium\", device=\"cuda\", compute_type=\"float16\")\n",
"\n",
"srtOutputDir = \"outputs\"\n",
"os.makedirs(srtOutputDir, exist_ok=True)\n",
"\n",
"def timeformat_srt(time):\n",
" hours = time // 3600\n",
" minutes = (time - hours * 3600) // 60\n",
" seconds = time - hours * 3600 - minutes * 60\n",
" milliseconds = (time - int(time)) * 1000\n",
" return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}\"\n",
"\n",
"def transcribe(filename, language=None):\n",
" outputs = []\n",
" fileBasename = os.path.basename(filename)\n",
"\n",
" segments, info = model.transcribe(filename, beam_size=5, language=language)\n",
"\n",
" pbar = FloatProgress(min=0, max=info.duration)\n",
" display(pbar)\n",
"\n",
" srtOutputChunks = []\n",
" for i, segment in enumerate(segments):\n",
" srtOutputChunks.append(\n",
" f\"{i + 2}\\n{timeformat_srt(segment.start)} --> {timeformat_srt(segment.end)}\\n{segment.text.strip()}\\n\"\n",
" )\n",
" print(f\"[{timeformat_srt(segment.start)}] {segment.text.strip()}\")\n",
" if segment.end is not None:\n",
" pbar.value = segment.end\n",
"\n",
" pbar.value = info.duration\n",
"\n",
" base, _ = os.path.splitext(fileBasename)\n",
" now = datetime.now()\n",
" srtFilename = '%s_%s.srt' % (base, now.strftime(f\"%Y%m%d_%H%S%S\"))\n",
" srtPath = os.path.join(srtOutputDir, srtFilename)\n",
" srtOutput = \"\\n\".join(srtOutputChunks)\n",
" with open(srtPath, 'w') as wf:\n",
" wf.write(srtOutput)\n",
"\n",
" return srtPath, srtOutput\n"
],
"metadata": {
"id": "GJkXP5i6-Zy-",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title Get audio with yt-dlp\n",
"\n",
"import yt_dlp\n",
"\n",
"def getYoutubeAudio(url):\n",
" final_filename = None\n",
"\n",
" def yt_dlp_monitor(d):\n",
" nonlocal final_filename\n",
" if d['status'] == 'finished':\n",
" final_filename = d.get('info_dict').get('_filename')\n",
"\n",
" ydl_opts = {\n",
" 'format': 'm4a/bestaudio/best',\n",
" 'progress_hooks': [yt_dlp_monitor],\n",
" 'postprocessors': [{ # Extract audio using ffmpeg\n",
" 'key': 'FFmpegExtractAudio',\n",
" 'preferredcodec': 'm4a',\n",
" }]\n",
" }\n",
"\n",
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
" ydl.download([url])\n",
"\n",
" return final_filename\n"
],
"metadata": {
"id": "jj5kP8q8w8k0",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from google.colab import files\n",
"\n",
"url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw' # @param {type:\"string\"}\n",
"fname = getYoutubeAudio(url)\n",
"srtPath, srtOutput = transcribe(fname)\n",
"files.download(srtPath)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 281,
"referenced_widgets": [
"1176f9c23d5a475b894c01998ce80114",
"17fcf507e79542f197706d006cc70cfc",
"b2845afe89344d0da49b349a2b95d355"
]
},
"id": "jeeA9cGnAlSD",
"outputId": "e2bac65b-967b-42f3-fa96-b375f24c6fe4"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[youtube] Extracting URL: https://www.youtube.com/watch?v=jNQXAC9IVRw\n",
"[youtube] jNQXAC9IVRw: Downloading webpage\n",
"[youtube] jNQXAC9IVRw: Downloading ios player API JSON\n",
"[youtube] jNQXAC9IVRw: Downloading m3u8 information\n",
"[info] jNQXAC9IVRw: Downloading 1 format(s): 140\n",
"[download] Destination: Me at the zoo [jNQXAC9IVRw].m4a\n",
"[download] 100% of 301.95KiB in 00:00:00 at 7.80MiB/s \n",
"[FixupM4a] Correcting container of \"Me at the zoo [jNQXAC9IVRw].m4a\"\n",
"[ExtractAudio] Not converting audio Me at the zoo [jNQXAC9IVRw].m4a; file is already in target format m4a\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"FloatProgress(value=0.0, max=19.0635625)"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "1176f9c23d5a475b894c01998ce80114"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"[00:00:00,000] Alright, so here we are in front of the elephants.\n",
"[00:00:05,000] The cool thing about these guys is that they have really, really, really long trunks.\n",
"[00:00:13,000] And that's cool.\n",
"[00:00:16,000] And that's pretty much all there is to say.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.Javascript object>"
],
"application/javascript": [
"\n",
" async function download(id, filename, size) {\n",
" if (!google.colab.kernel.accessAllowed) {\n",
" return;\n",
" }\n",
" const div = document.createElement('div');\n",
" const label = document.createElement('label');\n",
" label.textContent = `Downloading \"${filename}\": `;\n",
" div.appendChild(label);\n",
" const progress = document.createElement('progress');\n",
" progress.max = size;\n",
" div.appendChild(progress);\n",
" document.body.appendChild(div);\n",
"\n",
" const buffers = [];\n",
" let downloaded = 0;\n",
"\n",
" const channel = await google.colab.kernel.comms.open(id);\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
"\n",
" for await (const message of channel.messages) {\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
" if (message.buffers) {\n",
" for (const buffer of message.buffers) {\n",
" buffers.push(buffer);\n",
" downloaded += buffer.byteLength;\n",
" progress.value = downloaded;\n",
" }\n",
" }\n",
" }\n",
" const blob = new Blob(buffers, {type: 'application/binary'});\n",
" const a = document.createElement('a');\n",
" a.href = window.URL.createObjectURL(blob);\n",
" a.download = filename;\n",
" div.appendChild(a);\n",
" a.click();\n",
" div.remove();\n",
" }\n",
" "
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.Javascript object>"
],
"application/javascript": [
"download(\"download_09704e79-c4e4-4ade-a34c-b46457c6d60a\", \"Me at the zoo [jNQXAC9IVRw]_20240624_104242.srt\", 329)"
]
},
"metadata": {}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment