{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/Kazuki-tam/04e85708e4fd1c4b8af180d317977f4d/whisper-mock-en.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# 【Master】 whisper-mock\n",
        "Whisper is a general-purpose speech recognition model open-sourced by OpenAI.\n",
        "\n",
        "## 📖 How to use\n",
        "1. Run \"Setting up\".\n",
        "2. Open the folder icon from the left sidebar.\n",
        "3. Upload audio files into the `content`.\n",
        "4. Input the audio file name into `fileName`.\n",
        "5. Select output language.\n",
        "5. Run \"Transcription\"."
      ],
      "metadata": {
        "id": "zw5ButypVydc"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "XWIl4Ys54Ce6",
        "cellView": "form"
      },
      "outputs": [],
      "source": [
        "#@title Setting up\n",
        "# Install packages\n",
        "!pip install git+https://github.com/openai/whisper.git\n",
        "\n",
        "import os\n",
        "\n",
        "# Add folders\n",
        "checkContentFolder = os.path.exists(\"content\")\n",
        "checkDownLoadFolder = os.path.exists(\"download\")\n",
        "if not checkContentFolder:\n",
        "  os.mkdir(\"content\")\n",
        "if not checkDownLoadFolder:\n",
        "  os.mkdir(\"download\")"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Transcription\n",
        "import whisper\n",
        "\n",
        "fileName = \"sample.m4a\"#@param {type:\"string\"}\n",
        "lang = \"en\"#@param [\"en\", \"ja\"]\n",
        "model = whisper.load_model(\"base\")\n",
        "\n",
        "# Load audio\n",
        "audio = whisper.load_audio(f\"content/{fileName}\")\n",
        "audio = whisper.pad_or_trim(audio)\n",
        "\n",
        "mel = whisper.log_mel_spectrogram(audio).to(model.device)\n",
        "\n",
        "# Output the recognized text\n",
        "options = whisper.DecodingOptions(language=lang, without_timestamps=True)\n",
        "result = whisper.decode(model, mel, options)\n",
        "print(result.text)\n",
        "\n",
        "# Write into a text file\n",
        "with open(f\"download/{fileName}.txt\", \"w\") as f:\n",
        "  f.write(f\"▼ Transcription of {fileName}\\n\")\n",
        "  f.write(result.text)"
      ],
      "metadata": {
        "id": "scAiM8ug_s1M",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Download a transcription file\n",
        "from google.colab import files\n",
        "!zip -r download.zip download\n",
        "files.download(\"download.zip\")"
      ],
      "metadata": {
        "id": "fKEdUXyRrDIE",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}