Last active
September 11, 2022 09:03
-
-
Save k-kinzal/6610482734cd9427e0aba73bbc734c3a to your computer and use it in GitHub Desktop.
Textual Inversion (diffusers v0.3, Stable Diffusion, Real-ESRGAN).ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/k-kinzal/6610482734cd9427e0aba73bbc734c3a/textual-inversion-diffusers-v3-stable-diffusion-real-esrgan.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gKhD8mN7M1AS" | |
}, | |
"source": [ | |
"# Prepare\n", | |
"Colab Pro+で動作確認済み\n", | |
"\n", | |
"ランタイムのタイプ\n", | |
"- ハードウェアアクセラレータ: GPU\n", | |
"- ランタイムの仕様: ハイメモリ\n", | |
"- バックグラウンド実行あり" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "VAVrPKzLjohn", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title Google Driveをマウント\n", | |
"#@markdown /content/driveにGoogle Driveをマウントします\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "slksfedNvAsA", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title ディレクトリを初期化\n", | |
"#@markdown 後続で利用するディレクトリを事前に作成します\n", | |
"!rm -rf /content/checkpoint /content/inputs /content/outputs /content/results\n", | |
"!mkdir -p /content/checkpoint /content/inputs /content/outputs /content/results" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ro2uQ33cMwp6" | |
}, | |
"source": [ | |
"# Textual Inversion\n", | |
"diffusersのtextual inversionを使って対象のstyle、またはobjectを学習します\n", | |
"https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "DupwVS0uukYV", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title 初期化\n", | |
"#@markdown diffusersのtextual inversionのexampleをダウンロードして初期化します\n", | |
"%cd /content\n", | |
"!rm -rf textual_inversion.py requirements.txt\n", | |
"!wget -O /content/textual_inversion.py https://raw.githubusercontent.com/huggingface/diffusers/main/examples/textual_inversion/textual_inversion.py\n", | |
"!wget -O /content/requirements.txt https://raw.githubusercontent.com/huggingface/diffusers/main/examples/textual_inversion/requirements.txt\n", | |
"!pip install -r requirements.txt\n", | |
"!pip install -qq diffusers[\"training\"]==0.3.0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title 学習に使うファイルをアップロード\n", | |
"#@markdown アップロードするファイルは事前に512x512のサイズにクリッピングしてください\n", | |
"#@markdown\n", | |
"#@markdown **※ もし上手くアップロードできない場合はブラウザの設定からサードパーティーのCookieを受け付けるように設定を変更してください**\n", | |
"%rm -rf /content/inputs/*\n", | |
"%cd /content/inputs\n", | |
"from google.colab import files\n", | |
"files.upload()" | |
], | |
"metadata": { | |
"cellView": "form", | |
"id": "UI0QsHS-KMa0" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "P7K3fNkWv7Lx", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"%%shell\n", | |
"\n", | |
"#@title Textual Inversionの実行\n", | |
"#@markdown See: [https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py](https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py)\n", | |
"cd /content/\n", | |
"\n", | |
"HUGGINGFACE_API_KEY=\"[your api key]\" #@param {type:\"string\"}\n", | |
"pretrained_model_name_or_path=\"hakurei/waifu-diffusion\" #@param {type:\"string\"}\n", | |
"placeholder_token=\"<my-textual-inversion>\" #@param {type:\"string\"}\n", | |
"initializer_token=\"illustration\" #@param {type:\"string\"}\n", | |
"earnable_property=\"style\" #@param [\"object\", \"style\"] {allow-input: false}\n", | |
"repeats=100 #@param {type:\"number\"}\n", | |
"gradient_accumulation_steps=1 #@param {type:\"number\"}\n", | |
"max_train_steps=5000 #@param {type:\"number\"}\n", | |
"learning_rate=1e-4 #@param {type:\"number\"}\n", | |
"lr_scheduler=\"constant\" #@param [\"linear\", \"cosine\", \"cosine_with_restarts\", \"polynomial\", \"constant\", \"constant_with_warmup\"] {allow-input: false}\n", | |
"lr_warmup_steps=500 #@param {type:\"number\"}\n", | |
"mixed_precision=\"no\" #@param [\"no\", \"fp16\", \"bf16\"] {allow-input: false}\n", | |
"output_dir=\"/content/drive/MyDrive/path/to/text-inversion-model\" #@param {type:\"string\"} \n", | |
"\n", | |
"rm -rf \"${output_dir}\"\n", | |
"mkdir -p \"${output_dir}\"\n", | |
"\n", | |
"accelerate launch textual_inversion.py \\\n", | |
" --hub_token=\"${HUGGINGFACE_API_KEY}\" \\\n", | |
" --pretrained_model_name_or_path=\"${pretrained_model_name_or_path}\" \\\n", | |
" --train_data_dir=\"/content/inputs\" \\\n", | |
" --placeholder_token=\"${placeholder_token}\" \\\n", | |
" --initializer_token=\"${initializer_token}\" \\\n", | |
" --learnable_property=\"${learnable_property}\" \\\n", | |
" --repeats=${repeats} \\\n", | |
" --output_dir=\"${output_dir}\" \\\n", | |
" --resolution=512 \\\n", | |
" --train_batch_size=1 \\\n", | |
" --gradient_accumulation_steps=${gradient_accumulation_steps} \\\n", | |
" --max_train_steps=${max_train_steps} \\\n", | |
" --learning_rate=${learning_rate} \\\n", | |
" --scale_lr --lr_scheduler=\"${lr_scheduler}\" \\\n", | |
" --lr_warmup_steps=${lr_warmup_steps} \\\n", | |
" --mixed_precision \"fp16\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "LE72l9FWMpXv" | |
}, | |
"source": [ | |
"# Stable Diffusion (txt2img)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "x2Be8rSY_w9W", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title 初期化\n", | |
"import os\n", | |
"import torch\n", | |
"from diffusers import StableDiffusionPipeline, DDIMScheduler\n", | |
"from PIL import Image\n", | |
"\n", | |
"def image_grid(imgs, rows, cols):\n", | |
" assert len(imgs) == rows*cols\n", | |
"\n", | |
" w, h = imgs[0].size\n", | |
" grid = Image.new('RGB', size=(cols*w, rows*h))\n", | |
" grid_w, grid_h = grid.size\n", | |
" \n", | |
" for i, img in enumerate(imgs):\n", | |
" grid.paste(img, box=(i%cols*w, i//cols*h))\n", | |
" return grid\n", | |
"\n", | |
"model_name=\"/content/drive/MyDrive/path/to/text-inversion-model\" #@param {type:\"string\", description:\"aaa\"}\n", | |
"disable_safety_checker=True #@param {type:\"boolean\"}\n", | |
"\n", | |
"\n", | |
"pipe = StableDiffusionPipeline.from_pretrained(\n", | |
" model_name,\n", | |
" revision=\"fp16\", \n", | |
" torch_dtype=torch.float16,\n", | |
" scheduler=DDIMScheduler(\n", | |
" beta_start=0.00085,\n", | |
" beta_end=0.012,\n", | |
" beta_schedule=\"scaled_linear\",\n", | |
" clip_sample=False,\n", | |
" set_alpha_to_one=False,\n", | |
" )\n", | |
")\n", | |
"pipe.to(\"cuda\")\n", | |
"if disable_safety_checker == True:\n", | |
" def dummy(images, **kwargs): return images, False \n", | |
" pipe.safety_checker = dummy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "-RaHva9aA4zT", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title txt2imgの実行\n", | |
"#@markdown See: [https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L108-L150](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L108-L150)\n", | |
"!rm -rf /content/outputs/*\n", | |
"\n", | |
"import os\n", | |
"from torch import autocast\n", | |
"\n", | |
"prompt = \"\\u003Cmy-textual-inversion>\" #@param {type:\"string\"}\n", | |
"num_inference_steps=50 #@param {type:\"number\"}\n", | |
"guidance_scale=7.5 #@param {type:\"number\"}\n", | |
"seed = -1 #@param {type:\"number\"}\n", | |
"num_samples = 3 #@param {type:\"number\"}\n", | |
"num_rows = 3 #@param {type:\"number\"}\n", | |
"\n", | |
"generator = torch.Generator(\"cuda\")\n", | |
"if seed >= 0:\n", | |
" generator = generator.manual_seed(seed)\n", | |
"\n", | |
"all_images = [] \n", | |
"for i in range(num_rows):\n", | |
" with autocast(\"cuda\"):\n", | |
" images = pipe(\n", | |
" [prompt] * num_samples,\n", | |
" num_inference_steps=num_inference_steps,\n", | |
" guidance_scale=guidance_scale,\n", | |
" generator=generator,\n", | |
" ).images\n", | |
" for j, image in enumerate(images):\n", | |
" image.save(\"/content/outputs/{:03}.png\".format((i * num_samples) + (j + 1)))\n", | |
" all_images.extend(images)\n", | |
"\n", | |
"image_grid(all_images, num_samples, num_rows)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "9dd40IniM9D2" | |
}, | |
"source": [ | |
"# Real-ESRGAN\n", | |
"Stable Diffusionで出力した画像に対してReal-ESRGANで超解像化します" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "PB_JlnzdNB1D", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title 初期化\n", | |
"#@markdown Real-ESRGANをダウンロードして初期化します\n", | |
"%cd /content\n", | |
"# Clone Real-ESRGAN and enter the Real-ESRGAN\n", | |
"!git clone https://github.com/xinntao/Real-ESRGAN.git\n", | |
"%cd /content/Real-ESRGAN\n", | |
"# Set up the environment\n", | |
"!pip install basicsr\n", | |
"!pip install facexlib\n", | |
"!pip install gfpgan\n", | |
"!pip install -r requirements.txt\n", | |
"!python setup.py develop\n", | |
"# Download the pre-trained model\n", | |
"!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "8c31I5mMR7Pr", | |
"cellView": "form" | |
}, | |
"outputs": [], | |
"source": [ | |
"#@title Real-ESRGANを実行\n", | |
"%rm -rf /content/results/*\n", | |
"%cd /content/Real-ESRGAN\n", | |
"\n", | |
"\n", | |
"num_samples = 3 #@param {type:\"number\"}\n", | |
"num_rows = 3 #@param {type:\"number\"}\n", | |
"\n", | |
"# if it is out of memory, try to use the `--tile` option\n", | |
"# We upsample the image with the scale factor X3.5\n", | |
"!python inference_realesrgan.py -n RealESRGAN_x4plus -i /content/outputs -o /content/results --outscale 3.5 --face_enhance\n", | |
"# Arguments\n", | |
"# -n, --model_name: Model names\n", | |
"# -i, --input: input folder or image\n", | |
"# --outscale: Output scale, can be arbitrary scale factore. \n", | |
"\n", | |
"import glob\n", | |
"from PIL import Image\n", | |
"\n", | |
"def image_grid(imgs, rows, cols):\n", | |
" assert len(imgs) == rows*cols\n", | |
"\n", | |
" w, h = imgs[0].size\n", | |
" grid = Image.new('RGB', size=(cols*w, rows*h))\n", | |
" grid_w, grid_h = grid.size\n", | |
" \n", | |
" for i, img in enumerate(imgs):\n", | |
" grid.paste(img, box=(i%cols*w, i//cols*h))\n", | |
" return grid\n", | |
"\n", | |
"all_images = []\n", | |
"for path in sorted(glob.glob(os.path.join(\"/content/results\", \"*\"))):\n", | |
" image = Image.open(path)\n", | |
" all_images.extend([image.resize((512, 512))])\n", | |
"\n", | |
"image_grid(all_images, num_samples, num_rows)" | |
] | |
} | |
], | |
"metadata": { | |
"accelerator": "GPU", | |
"colab": { | |
"background_execution": "on", | |
"collapsed_sections": [], | |
"machine_shape": "hm", | |
"provenance": [], | |
"private_outputs": true, | |
"toc_visible": true, | |
"authorship_tag": "ABX9TyNy6UjCGLbMNNZXgErSxh6B", | |
"include_colab_link": true | |
}, | |
"gpuClass": "standard", | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment