Created
January 2, 2024 13:30
-
-
Save subaru-shoji/5d524af2cb5169bd4f3afb3dca217cc8 to your computer and use it in GitHub Desktop.
aivis-dataset_and_style-bert-vits2.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/subaru-shoji/5d524af2cb5169bd4f3afb3dca217cc8/aivis-dataset_and_style-bert-vits2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Parameters\n", | |
"url = \"\" # @param {type:\"string\"}\n", | |
"speaker_name = \"\" # @param {type:\"string\"}" | |
], | |
"metadata": { | |
"id": "51XXT-pJU1PE" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Aivis-Dataset" | |
], | |
"metadata": { | |
"id": "G4AeU_MbVNdg" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "s41Wwcti160H" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Install\n", | |
"%cd /content/\n", | |
"!git clone https://github.com/litagin02/Aivis-Dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "nkWn-1t_2j9N" | |
}, | |
"outputs": [], | |
"source": [ | |
"%cd /content/Aivis-Dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "t3DBSE862I51" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip3 install -r requirements.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "-58bfXCI_wvm" | |
}, | |
"outputs": [], | |
"source": [ | |
"!apt update -y\n", | |
"!apt install -y yt-dlp\n", | |
"!apt install -y libcublas11" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "mjodfvk4DlOF" | |
}, | |
"outputs": [], | |
"source": [ | |
"!yt-dlp --update" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "0FE8anZV7807" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Download youtube sound as wav file.\n", | |
"!yt-dlp -x --audio-format wav --audio-quality 0 -o \"01-Sources/{speaker_name}.%(ext)s\" \"{url}\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "BDAukvHUDcTn" | |
}, | |
"outputs": [], | |
"source": [ | |
"!python -m Aivis create-segments --trim-silence" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "pZMyDyAbF_rU" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Create dataset.\n", | |
"!python -m Aivis create-datasets ALL \"{speaker_name}\" --accept-all" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "pk6XuukYHVwa" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Download dataset.\n", | |
"!zip -r /content/{speaker_name}_dataset.zip /content/Aivis-Dataset/04-Datasets/{speaker_name}\n", | |
"\n", | |
"# 圧縮した zip ファイルをダウンロードする\n", | |
"from google.colab import files\n", | |
"files.download(f'/content/{speaker_name}_dataset.zip')\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Style-Bert-VITS2" | |
], | |
"metadata": { | |
"id": "jFjOsZoPVdbm" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "HDgwaX14KDlU" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Install\n", | |
"%cd /content/\n", | |
"!git clone https://github.com/litagin02/Style-Bert-VITS2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Kp5pXohqKPuN" | |
}, | |
"outputs": [], | |
"source": [ | |
"%cd /content/Style-Bert-VITS2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "SYB2-tgBKqvA" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install -r requirements.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "mA5hqJV9LVGv" | |
}, | |
"outputs": [], | |
"source": [ | |
"!python initialize.py" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "tv2AlGBaKSG0" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Copy dataset from Aivis-Dataset.\n", | |
"!cp -r /content/Aivis-Dataset/04-Datasets/{speaker_name} /content/drive/MyDrive/Style-Bert-VITS2/Data/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Preprocess\n", | |
"\n", | |
"# 上でつけたフォルダの名前`Data/{model_name}/`\n", | |
"model_name = speaker_name\n", | |
"\n", | |
"# 学習のバッチサイズ。4ぐらいが最適か。VRAMのはみ出具合に応じて調整してください。\n", | |
"batch_size = 8\n", | |
"\n", | |
"# 学習のエポック数(データセットを合計何周するか)。100ぐらいで十分だと思われます。\n", | |
"epochs = 100\n", | |
"\n", | |
"# 保存頻度。何ステップごとにモデルを保存するか。分からなければデフォルトのままで。\n", | |
"save_every_steps = 1000\n", | |
"\n", | |
"# 音声ファイルの音量を正規化するかどうか。\n", | |
"normalize = True\n", | |
"\n", | |
"# 音声ファイルの開始・終了にある無音区間を削除するかどうか\n", | |
"trim = True\n", | |
"\n", | |
"from webui_train import preprocess_all\n", | |
"\n", | |
"preprocess_all(\n", | |
" model_name=model_name,\n", | |
" batch_size=batch_size,\n", | |
" epochs=epochs,\n", | |
" save_every_steps=save_every_steps,\n", | |
" bf16_run=False, # colabの無料のやつではサポートされていないようです。\n", | |
" num_processes=2,\n", | |
" normalize=normalize,\n", | |
" trim=trim,\n", | |
")" | |
], | |
"metadata": { | |
"id": "ObHd6HJf6gzZ" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Train\n", | |
"\n", | |
"# 上でつけたモデル名を入力。学習を途中からする場合はきちんとモデルが保存されているフォルダ名を入力。\n", | |
"model_name = speaker_name\n", | |
"\n", | |
"import yaml\n", | |
"from webui_train import get_path\n", | |
"\n", | |
"dataset_path, _, _, _, config_path = get_path(model_name)\n", | |
"with open(\"default_config.yml\", \"r\", encoding=\"utf-8\") as f:\n", | |
" yml_data = yaml.safe_load(f)\n", | |
"yml_data[\"model_name\"] = model_name\n", | |
"yml_data[\"dataset_path\"] = dataset_path\n", | |
"with open(\"config.yml\", \"w\", encoding=\"utf-8\") as f:\n", | |
" yaml.dump(yml_data, f, allow_unicode=True)\n", | |
"\n", | |
"\n", | |
"!python train_ms.py --config {config_path} --model {dataset_path}" | |
], | |
"metadata": { | |
"id": "tv_2HFZm8-5G" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "IR1LQwr2Q_Lo" | |
}, | |
"outputs": [], | |
"source": [ | |
"# @title Delete \"Data/{speaker_name}/models\". (if you need to delete models directory.)\n", | |
"!rm -rf /content/drive/MyDrive/Style-Bert-VITS2/Data/{speaker_name}/models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Downalod model.\n", | |
"\n", | |
"# 速度がめちゃくちゃ遅いので、直接Google Driveからダウンロードするのが早いっぽい\n", | |
"\n", | |
"!zip -r /content/{speaker_name}.zip /content/drive/MyDrive/Style-Bert-VITS2/model_assets/{speaker_name}\n", | |
"\n", | |
"# 圧縮した zip ファイルをダウンロードする\n", | |
"from google.colab import files\n", | |
"files.download(f'/content/{speaker_name}.zip')\n" | |
], | |
"metadata": { | |
"id": "XFduqSwECkf2" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Copy model from Google Drive to Style-Bert-VITS2.\n", | |
"!cp -r /content/drive/MyDrive/Style-Bert-VITS2/Data/{speaker_name} /content/Style-Bert-VITS2/Data/\n", | |
"!cp -r /content/drive/MyDrive/Style-Bert-VITS2/model_assets/{speaker_name}/ /content/Style-Bert-VITS2/model_assets/" | |
], | |
"metadata": { | |
"id": "NNMydAHOKQym" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Create styles.\n", | |
"\n", | |
"# 手動でshare=Trueを書き換える\n", | |
"!python webui_style_vectors.py" | |
], | |
"metadata": { | |
"id": "GByK16FHJJ6p" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title Launch tts web ui.\n", | |
"!python app.py --share --dir \"/content/Style-Bert-VITS2/model_assets/\"" | |
], | |
"metadata": { | |
"id": "zSuwv4ftFpX2" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"accelerator": "GPU", | |
"colab": { | |
"machine_shape": "hm", | |
"provenance": [], | |
"gpuType": "A100", | |
"mount_file_id": "1hwLdHMBzkwStYm1WL8V7lPmafyZf-e_s", | |
"authorship_tag": "ABX9TyP4aSgnR2nZHuXzqYRFS0ej", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment