Skip to content

Instantly share code, notes, and snippets.

View kohya-ss's full-sized avatar

Kohya S. kohya-ss

View GitHub Profile
@kohya-ss
kohya-ss / gradio_llm.py
Last active May 2, 2024 05:57
gradioでLLMを利用する簡易クライアント
# Apache License 2.0
# 使用法は gist のコメントを見てください
import argparse
from typing import List, Optional, Union, Iterator
from llama_cpp.llama_chat_format import _convert_completion_to_chat, register_chat_completion_handler
import llama_cpp.llama_types as llama_types
from llama_cpp.llama import LogitsProcessorList, LlamaGrammar
from llama_cpp import Llama, llama_chat_format
import gradio as gr
@kohya-ss
kohya-ss / gradio_cmdrp.py
Created April 18, 2024 13:09
llama-cpp-python と gradio で command-r-plus を動かす
# Apache License 2.0
# 使用法は gist のコメントを見てください
import argparse
from typing import List, Optional, Union, Iterator
from llama_cpp import Llama
from llama_cpp.llama_tokenizer import LlamaHFTokenizer
from llama_cpp.llama_chat_format import _convert_completion_to_chat, register_chat_completion_handler
import llama_cpp.llama_types as llama_types
@kohya-ss
kohya-ss / sdxl_train_te_leco.py
Last active March 21, 2024 12:12
LECOっぽいText Encoder only LoRAを学習する検証実装
# sdxl_train_network.py と同じ引数を指定してください
# --network_train_text_encoder_only オプションが必須です
#
# 260行目あたりの src_str, tgt_str に変換元と変換先のテキストを指定してください
# tagger の selected_tags.csv が必要ですので、適宜パスを変更してください
# この辺にあります : https://huggingface.co/SmilingWolf/wd-v1-4-swinv2-tagger-v2/tree/main
#
# "1girl" タグだけは必ず含まれる感じにしているので、必要なら 820 行目あたりを適宜変更してください
#
# 以下のオプションは指定できません(エラーになります):
@kohya-ss
kohya-ss / vae_vs_taesd_gradio.py
Last active March 18, 2024 23:17
VAEとTAESDのdecode結果を比較するやつ、Gradio版
# Claude 3 Opus とめっちゃやり取りして動くようになった
# python vae_vs_taesd_gradio.py --image_dir /path/to/image/directory
import os
import argparse
import random
from PIL import Image
import torch
from diffusers import AutoencoderKL, AutoencoderTiny
import numpy as np
@kohya-ss
kohya-ss / vae_vs_taesd.py
Created March 17, 2024 23:32
VAEとTAESDのdecode結果を比較するやつ
# Claude 3 Opus にだいたい書いてもらった
# python vae_vs_taesd.py --image_dir /path/to/image/directory
import os
import argparse
import random
from PIL import Image, ImageTk
import torch
from diffusers import AutoencoderKL, AutoencoderTiny
import tkinter as tk
@kohya-ss
kohya-ss / fragment_of_sdxl_gen_img.py
Created January 22, 2024 23:48
クロマキー合成っぽいことをやる
each_control_net_enabled = [self.control_net_enabled] * len(self.control_nets)
for i, t in enumerate(tqdm(timesteps)):
# ↓ ここから
# test: chroma key like composition
if latents.shape[0] == 4:
# run this script with batch size 4
# sample prompt for ANIMAGINE XL V3.0: 2nd prompt doesn't have detailes, because it is used for making mask
# green surface of green screen --n color, artifact, object, shadow, frame --d 1
# 1girl, serafuku, standing, cowboy shot, green background, masterpiece, best quality --n nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name --d 1
import comfy
from comfy.samplers import KSAMPLER
import torch
from torchvision.transforms.functional import gaussian_blur
from comfy.k_diffusion.sampling import default_noise_sampler, get_ancestral_step, to_d, BrownianTreeNoiseSampler
from tqdm.auto import trange
@torch.no_grad()
def sample_euler_ancestral(
@kohya-ss
kohya-ss / patch_for_gen_img_diffusers.py
Created November 19, 2023 06:11
GradualLatent highres fixのgen_img_diffusers.pyへの差分
# how much to increase the scale at each step: .125 seems to work well (because it's 1/8?)
# 各ステップに拡大率をどのくらい増やすか:.125がよさそう(たぶん1/8なので)
scale_step = 0.125
# timesteps at which to start increasing the scale: model and prompt dependent
# 拡大を開始するtimesteps:モデルとプロンプトによる
start_timesteps = 800
# how many steps to wait before increasing the scale again: smaller values lead to more artifacts, also depends on the total number of steps
# 何ステップごとに拡大するか:総ステップ数にも関係する
@kohya-ss
kohya-ss / forward_of_sdxl_original_unet.py
Created November 14, 2023 03:39
SDXLで高解像度での構図の破綻を軽減する
def forward(self, x, timesteps=None, context=None, y=None, **kwargs):
# broadcast timesteps to batch dimension
timesteps = timesteps.expand(x.shape[0])
hs = []
t_emb = get_timestep_embedding(timesteps, self.model_channels) # , repeat_only=False)
t_emb = t_emb.to(x.dtype)
emb = self.time_embed(t_emb)
assert x.shape[0] == y.shape[0], f"batch size mismatch: {x.shape[0]} != {y.shape[0]}"