{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "<a href=\"https://colab.research.google.com/gist/Kazuki-tam/04e85708e4fd1c4b8af180d317977f4d/whisper-mock-en.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { "cell_type": "markdown", "source": [ "# 【Master】 whisper-mock\n", "Whisper is a general-purpose speech recognition model open-sourced by OpenAI.\n", "\n", "## 📖 How to use\n", "1. Run \"Setting up\".\n", "2. Open the folder icon from the left sidebar.\n", "3. Upload audio files into the `content`.\n", "4. Input the audio file name into `fileName`.\n", "5. Select output language.\n", "5. Run \"Transcription\"." ], "metadata": { "id": "zw5ButypVydc" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "XWIl4Ys54Ce6", "cellView": "form" }, "outputs": [], "source": [ "#@title Setting up\n", "# Install packages\n", "!pip install git+https://github.com/openai/whisper.git\n", "\n", "import os\n", "\n", "# Add folders\n", "checkContentFolder = os.path.exists(\"content\")\n", "checkDownLoadFolder = os.path.exists(\"download\")\n", "if not checkContentFolder:\n", " os.mkdir(\"content\")\n", "if not checkDownLoadFolder:\n", " os.mkdir(\"download\")" ] }, { "cell_type": "code", "source": [ "#@title Transcription\n", "import whisper\n", "\n", "fileName = \"sample.m4a\"#@param {type:\"string\"}\n", "lang = \"en\"#@param [\"en\", \"ja\"]\n", "model = whisper.load_model(\"base\")\n", "\n", "# Load audio\n", "audio = whisper.load_audio(f\"content/{fileName}\")\n", "audio = whisper.pad_or_trim(audio)\n", "\n", "mel = whisper.log_mel_spectrogram(audio).to(model.device)\n", "\n", "# Output the recognized text\n", "options = whisper.DecodingOptions(language=lang, without_timestamps=True)\n", "result = whisper.decode(model, mel, options)\n", "print(result.text)\n", "\n", "# Write into a text file\n", "with open(f\"download/{fileName}.txt\", \"w\") as f:\n", " f.write(f\"▼ Transcription of {fileName}\\n\")\n", " f.write(result.text)" ], "metadata": { "id": "scAiM8ug_s1M", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Download a transcription file\n", "from google.colab import files\n", "!zip -r download.zip download\n", "files.download(\"download.zip\")" ], "metadata": { "id": "fKEdUXyRrDIE", "cellView": "form" }, "execution_count": null, "outputs": [] } ] }