Skip to content

Instantly share code, notes, and snippets.

@inu-ai
Last active August 30, 2022 02:11
Show Gist options
  • Save inu-ai/6b1b82891b0a4da8ff0c6e6fd97c9978 to your computer and use it in GitHub Desktop.
Save inu-ai/6b1b82891b0a4da8ff0c6e6fd97c9978 to your computer and use it in GitHub Desktop.
Img2img8_22_V3_jp.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Img2img8_22_V3_jp.ipynb",
"provenance": [],
"collapsed_sections": [
"6flZkpuzHIsj"
],
"machine_shape": "hm",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/thx-pw/6b1b82891b0a4da8ff0c6e6fd97c9978/img2img8_22_v3_jp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# このノートブックはredditの[Stable Diffusion Img2Img Google Collab Setup Guide](https://www.reddit.com/r/StableDiffusion/comments/wv2msc/stable_diffusion_img2img_google_collab_setup_guide/)をdeeplで日本語に翻訳したものです。"
],
"metadata": {
"id": "BLwAgn3k6hAq"
}
},
{
"cell_type": "markdown",
"source": [
"ウェイトをダウンロードし、**あなたのベースグーグルドライバフォルダ内**に置いてください(404エラーがある場合はアカウントを作成し、あなたが未承認であると表示された場合はライセンス契約を承諾してください): https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- または、こちらからダウンロードしてください https://drive.google.com/file/d/1kPURA5ErV9ohu872LEOIrsiElpoGb9ww/view \n",
"オリジナルコラボはu/Najbox、修正はu/cR_SpitfireまたはDiscordのThicc Birb#1468によるものです。"
],
"metadata": {
"id": "wr0H6s5umpnT"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "slEHb8V2FpWE",
"collapsed": true,
"cellView": "form"
},
"outputs": [],
"source": [
"#@title セットアップをしよう\n",
"!nvidia-smi\n",
"#@title セットアップ\n",
"!git clone https://github.com/CompVis/stable-diffusion.git\n",
"!pip install omegaconf einops pytorch-lightning transformers kornia -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers -e git+https://github.com/openai/CLIP.git@main#egg=clip\n",
"\n",
"\n",
"from IPython.display import clear_output \n",
"clear_output()\n",
"\n",
"\n",
"#@title ドライブからモデルをコピーする\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"\n",
"!mkdir -p /content/stable-diffusion/models/ldm/stable-diffusion-v1/\n",
"!cp -r /content/drive/MyDrive/sd-v1-4.ckpt /content/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt\n",
"\n",
"#@title リロード\n",
"import os\n",
"os._exit(00)\n",
"\n",
"#このセルを実行すると、ノートブックがリロードされますが、これは正常です。"
]
},
{
"cell_type": "markdown",
"source": [
"# 環境は再読み込みされました、エラーは気にしないでください! これは意図的なものです。以下、最後のセットアップです。"
],
"metadata": {
"id": "6flZkpuzHIsj"
}
},
{
"cell_type": "code",
"source": [
"#@title 最終セットアップ!\n",
"%cd stable-diffusion\n",
"!mkdir ImageC\n",
"from IPython.display import clear_output \n",
"clear_output()\n",
"#@title ライブラリの取り込みと関数の定義\n",
"\"\"\"make variations of input image\"\"\"\n",
"\n",
"import os\n",
"import PIL\n",
"import torch\n",
"import numpy as np\n",
"from omegaconf import OmegaConf\n",
"from PIL import Image\n",
"from tqdm import tqdm, trange\n",
"from itertools import islice\n",
"from einops import rearrange, repeat\n",
"from torchvision.utils import make_grid\n",
"from torch import autocast\n",
"from contextlib import nullcontext\n",
"import time\n",
"from pytorch_lightning import seed_everything\n",
"\n",
"from ldm.util import instantiate_from_config\n",
"from ldm.models.diffusion.ddim import DDIMSampler\n",
"from ldm.models.diffusion.plms import PLMSSampler\n",
"\n",
"\n",
"def chunk(it, size):\n",
" it = iter(it)\n",
" return iter(lambda: tuple(islice(it, size)), ())\n",
"\n",
"\n",
"def load_model_from_config(config_path = \"configs/stable-diffusion/v1-inference.yaml\", ckpt = \"models/ldm/stable-diffusion-v1/model.ckpt\", verbose=False):\n",
" print(f\"Loading model from {ckpt}\")\n",
" pl_sd = torch.load(ckpt, map_location=\"cuda\")\n",
" if \"global_step\" in pl_sd:\n",
" print(f\"Global Step: {pl_sd['global_step']}\")\n",
" sd = pl_sd[\"state_dict\"]\n",
"\n",
" config = OmegaConf.load(config_path)\n",
" model = instantiate_from_config(config.model)\n",
" m, u = model.load_state_dict(sd, strict=False)\n",
" if len(m) > 0 and verbose:\n",
" print(\"missing keys:\")\n",
" print(m)\n",
" if len(u) > 0 and verbose:\n",
" print(\"unexpected keys:\")\n",
" print(u)\n",
"\n",
" model.cuda()\n",
" model.eval()\n",
" return model\n",
"\n",
"\n",
"def load_img(path):\n",
" image = Image.open(path).convert(\"RGB\")\n",
" w, h = image.size\n",
" print(f\"loaded input image of size ({w}, {h}) from {path}\")\n",
" w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32\n",
" image = image.resize((w, h), resample=PIL.Image.LANCZOS)\n",
" image = np.array(image).astype(np.float32) / 255.0\n",
" image = image[None].transpose(0, 3, 1, 2)\n",
" image = torch.from_numpy(image)\n",
" return 2.*image - 1.\n",
"\n",
"\n",
"def image2image(prompt, plms = True, outdir = \"/content/output\", n_samples = 3, n_rows = 0, skip_save = False, skip_grid = False, ddim_steps = 50, from_file = None, fixed_code = False, strength = 0.75, init_img = \"/content/stable-diffusion/assets/stable-samples/img2img/sketch-mountains-input.jpg\", C = 4, H = 512, W = 512, f = 8, precision = \"full\", n_iter = 2, seed = 1610684295, scale = 7.5, ddim_eta = 0):\n",
"\n",
"\n",
" if plms:\n",
" sampler = PLMSSampler(model)\n",
" else:\n",
" sampler = DDIMSampler(model)\n",
"\n",
" os.makedirs(outdir, exist_ok=True)\n",
" outpath = outdir\n",
"\n",
" batch_size = n_samples\n",
" n_rows = n_rows if n_rows > 0 else batch_size\n",
" if not from_file:\n",
" prompt = prompt\n",
" assert prompt is not None\n",
" data = [batch_size * [prompt]]\n",
"\n",
" else:\n",
" print(f\"reading prompts from {from_file}\")\n",
" with open(from_file, \"r\") as f:\n",
" data = f.read().splitlines()\n",
" data = list(chunk(data, batch_size))\n",
"\n",
" sample_path = os.path.join(outpath, \"samples\")\n",
" os.makedirs(sample_path, exist_ok=True)\n",
" base_count = len(os.listdir(sample_path))\n",
" grid_count = len(os.listdir(outpath)) - 1\n",
"\n",
" assert os.path.isfile(init_img)\n",
" init_image = load_img(init_img).to(device)\n",
" init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)\n",
" init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space\n",
"\n",
" sampler.make_schedule(ddim_num_steps=ddim_steps, ddim_eta=ddim_eta, verbose=False)\n",
"\n",
" assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'\n",
" t_enc = int(strength * ddim_steps)\n",
" print(f\"target t_enc is {t_enc} steps\")\n",
"\n",
" precision_scope = autocast if precision == \"autocast\" else nullcontext\n",
" with torch.no_grad():\n",
" with precision_scope(\"cuda\"):\n",
" with model.ema_scope():\n",
" tic = time.time()\n",
" all_samples = list()\n",
" for n in trange(n_iter, desc=\"Sampling\"):\n",
" for prompts in tqdm(data, desc=\"data\"):\n",
" uc = None\n",
" if scale != 1.0:\n",
" uc = model.get_learned_conditioning(batch_size * [\"\"])\n",
" if isinstance(prompts, tuple):\n",
" prompts = list(prompts)\n",
" c = model.get_learned_conditioning(prompts)\n",
"\n",
" # encode (scaled latent)\n",
" z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(device))\n",
" # decode it\n",
" samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=scale,\n",
" unconditional_conditioning=uc,)\n",
"\n",
" x_samples = model.decode_first_stage(samples)\n",
" x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)\n",
"\n",
" if not skip_save:\n",
" for x_sample in x_samples:\n",
" x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')\n",
" Image.fromarray(x_sample.astype(np.uint8)).save(\n",
" os.path.join(sample_path, f\"{base_count:05}.png\"))\n",
" base_count += 1\n",
" all_samples.append(x_samples)\n",
"\n",
" if not skip_grid:\n",
" # additionally, save as grid\n",
" grid = torch.stack(all_samples, 0)\n",
" grid = rearrange(grid, 'n b c h w -> (n b) c h w')\n",
" grid = make_grid(grid, nrow=n_rows)\n",
"\n",
" # to image\n",
" grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()\n",
" Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'grid-{grid_count:04}.png'))\n",
" grid_count += 1\n",
"\n",
" toc = time.time()\n",
"\n",
" print(f\"Your samples are ready and waiting for you here: \\n{outpath} \\n\"\n",
" f\" \\nEnjoy.\")\n",
" return grid_count\n",
"\n",
"#@title ローディングモデル\n",
"model = load_model_from_config()\n",
"\n",
"device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
"\n",
"torch.cuda.empty_cache()\n",
"\n",
"from IPython.display import clear_output \n",
"clear_output()\n",
"from google.colab import output\n",
"output.enable_custom_widget_manager()\n",
"from google.colab import output\n",
"output.disable_custom_widget_manager()"
],
"metadata": {
"id": "VHJx5wFGGAJs",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# すべての設定が完了しましたので、ここから以下のセルを使って画像を生成してください。"
],
"metadata": {
"id": "s8-4qmjyHMaz"
}
},
{
"cell_type": "code",
"source": [
"#@title <--- 画像のアップロード\n",
"import os\n",
"from google.colab import files\n",
"\n",
"uploaded = files.upload() # colabアップロードダイアログを使用します。\n",
"uploaded = list(uploaded.keys()) # アップロードされたファイル名を取得します。\n",
"assert len(uploaded) == 1 # アップロードするファイルは1つだけにしてください。\n",
"os.rename(uploaded[0], 'image.png') \n",
"\n",
"import cv2\n",
"import numpy as np\n",
"\n",
"def resizeAndPad(img, size, padColor=0):\n",
"\n",
" h, w = img.shape[:2]\n",
" sh, sw = size\n",
"\n",
" # 補間\n",
" if h > sh or w > sw: # shrinking image\n",
" interp = cv2.INTER_AREA\n",
" else: # stretching image\n",
" interp = cv2.INTER_CUBIC\n",
"\n",
" # 画像の縦横比\n",
" aspect = w/h # Python 2 の場合、float(w)/h としてキャストする必要があるかもしれません。\n",
"\n",
" # スケーリングとパッドサイジングを計算します。\n",
" if aspect > 1: # horizontal image\n",
" new_w = sw\n",
" new_h = np.round(new_w/aspect).astype(int)\n",
" pad_vert = (sh-new_h)/2\n",
" pad_top, pad_bot = np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)\n",
" pad_left, pad_right = 0, 0\n",
" elif aspect < 1: # vertical image\n",
" new_h = sh\n",
" new_w = np.round(new_h*aspect).astype(int)\n",
" pad_horz = (sw-new_w)/2\n",
" pad_left, pad_right = np.floor(pad_horz).astype(int), np.ceil(pad_horz).astype(int)\n",
" pad_top, pad_bot = 0, 0\n",
" else: # square image\n",
" new_h, new_w = sh, sw\n",
" pad_left, pad_right, pad_top, pad_bot = 0, 0, 0, 0\n",
"\n",
" # パッドカラーを設定する\n",
" if len(img.shape) is 3 and not isinstance(padColor, (list, tuple, np.ndarray)): # color image but only one color provided\n",
" padColor = [padColor]*3\n",
"\n",
" # スケールとパッド\n",
" scaled_img = cv2.resize(img, (new_w, new_h), interpolation=interp)\n",
" scaled_img = cv2.copyMakeBorder(scaled_img, pad_top, pad_bot, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=padColor)\n",
"\n",
" return scaled_img\n",
"\n",
"#@markdown Tesla P100 Max = 704 and Tesla T4 Max = 640\n",
"maxsize = 512 #@param [\"512\", \"640\", \"704\"] {type:\"raw\"}\n",
"\n",
"v_img = cv2.imread('/content/stable-diffusion/image.png') # vertical image\n",
"scaled_v_img = resizeAndPad(v_img, (maxsize,maxsize), 127)\n",
"\n",
"h_img = cv2.imread('/content/stable-diffusion/image.png') # horizontal image\n",
"scaled_h_img = resizeAndPad(h_img, (maxsize,maxsize), 127)\n",
"\n",
"sq_img = cv2.imread('/content/stable-diffusion/image.png') # square image\n",
"scaled_sq_img = resizeAndPad(sq_img, (maxsize,maxsize), 127)\n",
"\n",
"\n",
"Horizontal_Picture = scaled_h_img\n",
"Vertical_Picture = scaled_v_img\n",
"Crop_Picture = scaled_sq_img\n",
"\n",
"\n",
"val = Horizontal_Picture\n",
"\n",
"cv2.imwrite('/content/stable-diffusion/ImageC/image_1.png', val)"
],
"metadata": {
"id": "GybhWqea_IAw",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title <----- 設定を適用して生成\n",
"\n",
"#@markdown 短くて単純なプロンプトは使わないで、複雑なプロンプトの方が良い結果が得られます。\n",
"prompt = \"concept idea, kawaii, beautiful face\" #@param {type:\"string\"}\n",
"StartImage = \"/content/stable-diffusion/ImageC/image_1.png\"\n",
"#@markdown 高いほど、画像の修正が大きくなります[0.1~1]。\n",
"Strength = 0.572 #@param {type:\"slider\", min:0, max:1, step:0.001}\n",
"\n",
"#@markdown ステップ数が多いほど良い画像が得られますが、150ステップ以上の使用はお勧めしません\n",
"steps = 20 #@param {type:\"slider\", min:1, max:150, step:1}\n",
"\n",
"\n",
"Height = 512\n",
"Width = 512\n",
"\n",
"import random\n",
"\n",
"#@markdown 設定\n",
"Samples = 1 #@param [\"1\", \"2\", \"3\", \"4\"] {type:\"raw\"}\n",
"Iteration = 1 #@param [\"1\", \"2\", \"3\", \"4\"] {type:\"raw\"}\n",
"Seed = random.randrange(9999999999)\n",
"CFGScale = 10 #@param {type:\"slider\", min:-2, max:20, step:0.1}\n",
"\n",
"\n",
"sampler = \"ddim\"\n",
"\n",
"if sampler == \"plms\":\n",
" plms = True\n",
"else:\n",
" plms = False\n",
"\n",
"#@title <---- スタートジェネレーター\n",
"import torch\n",
"grid_count = image2image(prompt = prompt, init_img = StartImage, strength = Strength, ddim_steps = steps, plms = plms, H = Height, W = Width, n_samples = Samples, n_iter = Iteration, seed = Seed, scale = CFGScale,)\n",
"torch.cuda.empty_cache()\n",
"\n",
"from IPython.display import clear_output \n",
"clear_output()\n",
"\n",
"#@title 結果\n",
"from google.colab.patches import cv2_imshow\n",
"import cv2\n",
"import matplotlib.pyplot as plt\n",
"\n",
"img = cv2.imread(f\"/content/output/grid-{grid_count-1:04}.png\")\n",
"\n",
"cv2_imshow(img) "
],
"metadata": {
"id": "SZ1s_qlGJxPy",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title リサイクル結果\n",
"import os\n",
"\n",
"#@markdown これにより、生成される結果が準備されます。「Strength」の値が高すぎない場合、開始時の画像の一貫性を維持したまま、画像を深く修正することが可能です。\n",
"\n",
"old_file_name = f\"/content/output/grid-{grid_count-1:04}.png\"\n",
"new_file_name = \"/content/stable-diffusion/ImageC/image_1.png\"\n",
"\n",
"os.rename(old_file_name, new_file_name)\n",
"\n",
"print(\"The result is ready to be recycled\")"
],
"metadata": {
"cellView": "form",
"id": "-A_DCtR4sAf8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title グレーエッジリムーバー(オプション)\n",
"from PIL import Image, ImageChops\n",
"\n",
"def trim(im):\n",
" bg = Image.new(\"RGB\", im.size, im.getpixel((0,0)))\n",
" diff = ImageChops.difference(im.convert(\"RGB\"), bg)\n",
" diff = ImageChops.add(diff, diff, 2.0, -100)\n",
" bbox = diff.getbbox()\n",
" if bbox:\n",
" return im.crop(bbox)\n",
"\n",
"im = Image.open(f\"/content/output/grid-{grid_count-1:04}.png\")\n",
"im = trim(im)\n",
"im\n"
],
"metadata": {
"id": "XX8htro9qUK3",
"cellView": "form"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment