-
-
Save glowinthedark/e30fd836875aa07dce1da9d37ea0c5f9 to your computer and use it in GitHub Desktop.
whisper-mock-en.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/Kazuki-tam/04e85708e4fd1c4b8af180d317977f4d/whisper-mock-en.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# 【Master】 whisper-mock\n", | |
"Whisper is a general-purpose speech recognition model open-sourced by OpenAI.\n", | |
"\n", | |
"## 📖 How to use\n", | |
"1. Run \"Setting up\".\n", | |
"2. Open the folder icon from the left sidebar.\n", | |
"3. Upload audio files into the `content`.\n", | |
"4. Input the audio file name into `fileName`.\n", | |
"5. Select output language.\n", | |
"5. Run \"Transcription\"." | |
], | |
"metadata": { | |
"id": "zw5ButypVydc" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "XWIl4Ys54Ce6", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title Setting up\n", | |
"# Install packages\n", | |
"!pip install git+https://github.com/openai/whisper.git\n", | |
"\n", | |
"import os\n", | |
"\n", | |
"# Add folders\n", | |
"checkContentFolder = os.path.exists(\"content\")\n", | |
"checkDownLoadFolder = os.path.exists(\"download\")\n", | |
"if not checkContentFolder:\n", | |
" os.mkdir(\"content\")\n", | |
"if not checkDownLoadFolder:\n", | |
" os.mkdir(\"download\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title Transcription\n", | |
"import whisper\n", | |
"\n", | |
"fileName = \"sample.m4a\"#@param {type:\"string\"}\n", | |
"lang = \"en\"#@param [\"en\", \"ja\"]\n", | |
"model = whisper.load_model(\"base\")\n", | |
"\n", | |
"# Load audio\n", | |
"audio = whisper.load_audio(f\"content/{fileName}\")\n", | |
"audio = whisper.pad_or_trim(audio)\n", | |
"\n", | |
"mel = whisper.log_mel_spectrogram(audio).to(model.device)\n", | |
"\n", | |
"# Output the recognized text\n", | |
"options = whisper.DecodingOptions(language=lang, without_timestamps=True)\n", | |
"result = whisper.decode(model, mel, options)\n", | |
"print(result.text)\n", | |
"\n", | |
"# Write into a text file\n", | |
"with open(f\"download/{fileName}.txt\", \"w\") as f:\n", | |
" f.write(f\"▼ Transcription of {fileName}\\n\")\n", | |
" f.write(result.text)" | |
], | |
"metadata": { | |
"id": "scAiM8ug_s1M", | |
"cellView": "form" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title Download a transcription file\n", | |
"from google.colab import files\n", | |
"!zip -r download.zip download\n", | |
"files.download(\"download.zip\")" | |
], | |
"metadata": { | |
"id": "fKEdUXyRrDIE", | |
"cellView": "form" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment