Skip to content

Instantly share code, notes, and snippets.

@twobob
Created November 29, 2023 17:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save twobob/f6a85866879d8e3a8778b09e7fd592fe to your computer and use it in GitHub Desktop.
Save twobob/f6a85866879d8e3a8778b09e7fd592fe to your computer and use it in GitHub Desktop.
SSD Segmind bot with BLIP
#!/usr/bin/env python
import os
import random
import uuid
import subprocess
from typing import List, Optional
from tqdm import tqdm
import re
import time
### optional automated install
'''
def check_and_install(lib_name_mapping):
for lib, runtime_name in lib_name_mapping.items():
try:
__import__(runtime_name)
print(f'{runtime_name} is already installed.')
except ImportError:
print(f'Installing {runtime_name}...')
install_cmd = ['pip', 'install', runtime_name]
subprocess.run(install_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f'{runtime_name} has been installed.')
# Library name mapping
lib_name_mapping = {
'discord': 'discord',
'transformers': 'transformers',
'aiohttp': 'aiohttp',
'numpy': 'numpy',
'PIL': 'Pillow',
'torch': 'torch',
'diffusers': 'diffusers',
'controlnet_aux': 'controlnet_aux',
'dotenv': 'python-dotenv',
'compel': 'compel',
'gradio': 'gradio',
'clip_interrogator': 'clip-interrogator',
'cv2': 'opencv-contrib-python',
}
check_and_install(lib_name_mapping)
'''
## clip-interrogator-0.6.0 huggingface-hub-0.19.4 open_clip_torch-2.23.0 tokenizers-0.15.0 protobuf-4.25.1
try:
import cv2
except ImportError:
print('Issue importing the cv2 module. Please install `pip install opencv-contrib-python`')
try:
import discord
from discord import app_commands
except ImportError:
print('Issue importing the discord module. Please install `pip install discord`')
try:
from transformers import pipeline , CLIPTokenizer, AutoImageProcessor, UperNetForSemanticSegmentation #, AutoModelForCausalLM , pipeline
except ImportError:
print('Issue importing the transformers module. Please install `pip install transformers`')
try:
import aiohttp
except ImportError:
print('Issue importing the numpy module. Please install `pip install aiohttp`')
try:
import numpy as np
except ImportError:
print('Issue importing the numpy module. Please install `pip install numpy`')
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
print('Issue importing the PIL module. Please install `pip install PIL`')
try:
import torch
from torch import autocast
except ImportError:
print('Issue importing the torch module. Please install `pip install torch`')
try:
from diffusers import LCMScheduler, AutoPipelineForText2Image, AutoencoderKL, StableDiffusionControlNetPipeline, StableDiffusionXLPipeline, DiffusionPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image
except ImportError:
print('Issue importing the diffusers module. Please install `pip install diffusers`')
try:
from controlnet_aux import OpenposeDetector
except ImportError:
print('Issue importing the controlnet_aux module. Please install `pip install controlnet_aux`')
try:
from dotenv import load_dotenv
except ImportError:
print('Issue importing the dotenv module. Please install `pip install python-dotenv`')
try:
from compel import Compel, ReturnedEmbeddingsType
except ImportError:
print('Issue importing the compel module. Please install `pip install git+https://github.com/damian0815/compel/`')
try:
#import gradio as gr
from clip_interrogator import Config, Interrogator
except ImportError:
print('Issue importing the clip_interrogator module. Please install `pip install open-clip-torch clip-interrogator`')
if not os.path.isfile('./realesrgan-ncnn-vulkan.exe'):
print('Can not find `./realesrgan-ncnn-vulkan.exe` in current working directory')
# create env file if it does not exist with default values
# Read from it afterwards
#
if not os.path.isfile('.env'):
with open(".env", "w") as f:
f.write("DISCORD_BOT_TOKEN=\n")
f.write("CACHE_EXAMPLES=1\n")
f.write("MAX_IMAGE_SIZE=1024\n")
f.write("USE_TORCH_COMPILE=1\n")
f.write("ENABLE_CPU_OFFLOAD=0\n")
f.write("ENABLE_REFINER=0")
# Load settings from the .env file
load_dotenv()
from lists import get_random_terms, seg_palette, get_random_mix
caption_model_name = 'blip-large' #@param ["blip-base", "blip-large", "git-large-coco"]
clip_model_name = 'ViT-L-14/openai' #@param ["ViT-L-14/openai", "ViT-H-14/laion2b_s32b_b79k"]
ci = None
def image_analysis(imagepath):
image = load_image(imagepath)
image = image.convert('RGB')
image_features = ci.image_to_features(image)
top_mediums = ci.mediums.rank(image_features, 5)
top_artists = ci.artists.rank(image_features, 5)
top_movements = ci.movements.rank(image_features, 5)
top_trendings = ci.trendings.rank(image_features, 5)
top_flavors = ci.flavors.rank(image_features, 5)
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))}
trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))}
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks
def image_to_prompt(imagepath, mode):
ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
image = load_image(imagepath)
image = image.convert('RGB')
if mode == 'best':
return ci.interrogate(image)
elif mode == 'classic':
return ci.interrogate_classic(image)
elif mode == 'fast':
return ci.interrogate_fast(image)
elif mode == 'negative':
return ci.interrogate_negative(image)
# Discord Bot Setup
intents = discord.Intents.default()
intents.message_content = True
intents = discord.Intents.default()
client = discord.Client(intents=intents)
tree = discord.app_commands.CommandTree(client)
# Environment Setup and Model Initialization
MAX_SEED = np.iinfo(np.int32).max
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "1") == "1"
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "1") == "1"
ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
ENABLE_REFINER = os.getenv("ENABLE_REFINER", "0") == "1"
DEFAULT_SCALE = 1
IMAGE_COUNT = 1
MAX_STEPS = 100
unique_name = "oops.png"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
### Model query defaults
GUIDANCE_DEFAULT = 1.0
STEPS_DEFAULT = 14
WIDTH_DEFAULT = 800
HEIGHT_DEFAULT = 1280
IMAGES_COUNT_DEFAULT = 6
TARGET_STRENGTH_DEFAULT = 0.5
NEGATIVE_PROMPT_DEFAULT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
DEFAULT_FONT_LOCATION = "C:\Windows\WinSxS\amd64_microsoft-windows-font-truetype-arial_31bf3856ad364e35_10.0.22621.1_none_d4193be3a119442b\arial.ttf" # Path to a .ttf font file
DEFAULT_IMAGES_FOLDER_PREFIX = ".\\image\\"
CAPTION_MODEL_DEFAULT= "blip-base", # default value for caption model
CLIP_MODEL_DEFAULT = "ViT-L-14/openai", # default value for clip model
CAPTION_TYPE_DEFAULT= "image_to_prompt" # default value for captioning type
###
vae = None
pipe = None
refiner = None
control_net_image_path = None
upscale_image_size = None
target_conditioning = None
target_pooled = None
target_prompt = ""
negative_target_prompt = ""
negative_target_conditioning = None
negative_target_pooled = None
#negative_prompt_two = ""
org_init_image_size = 0
adapter_id = "latent-consistency/lcm-lora-ssd-1b"
if torch.cuda.is_available():
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = AutoPipelineForText2Image.from_pretrained(
#pipe = StableDiffusionXLPipeline.from_pretrained(
#"stabilityai/stable-diffusion-xl-base-1.0",
"segmind/SSD-1B",
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
if ENABLE_REFINER:
refiner = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
if ENABLE_CPU_OFFLOAD:
pipe.enable_model_cpu_offload()
if refiner is not None:
refiner.enable_model_cpu_offload()
else:
pipe.to(device)
if refiner is not None:
refiner.to(device)
pipe.load_lora_weights(adapter_id)
pipe.fuse_lora()
if USE_TORCH_COMPILE and not os.name == 'nt': # Check if the OS is not Windows
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
if refiner is not None:
refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
compel = Compel(
tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True]
)
compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)
# Image To image Pipeline
model_id_or_path = "segmind/SSD-1B"
#pipe_image_to_image = StableDiffusionXLImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
#pipe_image_to_image = pipe.to('cuda')
#init_image = Image.open("wub.png").convert("RGB").resize((768, 512))
#prompt = "A fantasy landscape, trending on artstation"
#images = pipe_image_to_image(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
#images[0].save("fantasy_landscape.png")
# AUTO PROMPT GENERATION
#sd_tokenizer = AutoTokenizer.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion')
#sd_model = AutoModelForCausalLM.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion')
sd_pipeline = pipeline('text-generation', model='Gustavosta/MagicPrompt-Stable-Diffusion', max_length=128, pad_token_id=0)
def save_image(img, add_watermark=True):
global DEFAULT_SCALE
# Load a font
if add_watermark:
font_path = DEFAULT_FONT_LOCATION
try:
font = ImageFont.truetype(font_path, 30) # Adjust the size to fit your needs
except IOError:
font = ImageFont.load_default()
# Add watermark
watermark_text = "AI Enthusiasts"
draw = ImageDraw.Draw(img)
# Calculate the bounding box at (0, 0) position
bbox = draw.textbbox((0, 0), watermark_text, font=font)
# Calculate text width and height from bbox
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Calculate x, y for bottom right position
x = img.width - text_width - 20 # 10 pixels from the right
y = img.height - text_height - 20 # 10 pixels from the bottom
# Draw the text
draw.text((x, y), watermark_text, font=font, fill=(127, 127, 128))
# Save the image with a unique name
unique_name = str(uuid.uuid4()) + '.png'
img.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, unique_name)))
upscale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_upscale_'+unique_name)
if DEFAULT_SCALE != 1:
if img.width * img.height > 819200 and DEFAULT_SCALE > 3:
DEFAULT_SCALE = 3
if img.width * img.height > 1310720 and DEFAULT_SCALE > 2:
DEFAULT_SCALE = 2
if DEFAULT_SCALE < 1:
w, h = img.size
new_w = int(w * DEFAULT_SCALE)
new_h = int(h * DEFAULT_SCALE)
processed_img = img.resize((new_w, new_h))
processed_img.save(upscale_name)
else:
# Run the executable
subprocess.run(['./realesrgan-ncnn-vulkan.exe', '-i', str(unique_name), '-o', upscale_name, '-s', str(DEFAULT_SCALE)], check=True)
# Load the processed image
processed_img = Image.open(upscale_name)
return unique_name
def randomize_seed_fn(seed: int) -> int:
seed = random.randint(0, MAX_SEED)
return seed
def is_empty_string(s):
return s is None or not s.strip()
# Function to generate images
# The complete generate_for_discord function
def generate_for_discord(
prompt: str,
width: int = WIDTH_DEFAULT,
height: int = HEIGHT_DEFAULT,
guidance_scale: float = GUIDANCE_DEFAULT,
num_inference_steps: int = STEPS_DEFAULT,
apply_refiner: bool = True,
scale: float = 1.0,
seed: int = 0,
#negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed"
negative_prompt: str = NEGATIVE_PROMPT_DEFAULT,
#image_to_image: bool = False,
#image_to_image_strength: float = 0.3,
auto :bool = False,
#image_to_image_prompt: str = ""
#quality_terms :int = 0,
#lighting_terms: int = 0,
#media_terms : int =0,
#random_real_artists :int =0,
#style_terms: int=0,
random_terms: bool=False,
#controlnet_type: str="Depth",
init_image_path: str = None,
#caption_model: str = CAPTION_MODEL_DEFAULT,# "blip-base", # default value for caption model
#clip_model: str = CLIP_MODEL_DEFAULT,# "ViT-L-14/openai", # default value for clip model
#captioning_type: str = CAPTION_TYPE_DEFAULT,# "image_analysis" # default value for captioning type
target_mode: bool=False,
target_strength: float = TARGET_STRENGTH_DEFAULT,
):
global IMAGE_COUNT
init_image = None
control_net_image_path = None
latents = None
image = None
image_path = None
generator = torch.Generator().manual_seed(seed)
if num_inference_steps > MAX_STEPS // IMAGE_COUNT :
num_inference_steps = MAX_STEPS // IMAGE_COUNT
target_strength = max(0, min(target_strength, 1))
conditioning, pooled = compel(prompt)
negative_conditioning, negative_pooled = compel(negative_prompt)
if init_image_path is not None and target_mode:
conditioning = conditioning - ((conditioning - target_conditioning) * target_strength )
pooled = pooled - ((pooled - target_pooled)* target_strength )
negative_conditioning = negative_conditioning - ((negative_conditioning - negative_target_conditioning) * target_strength )
negative_pooled = negative_pooled - ((negative_pooled - negative_target_pooled) * target_strength)
#if image_to_image_prompt == "":
# image_to_image_prompt = prompt
# add support for second prompt embeddings
#i2i_conditioning, i2i_pooled = compel(image_to_image_prompt)
#image_to_image_path = "not_set"
#if init_image_path is None:
if not apply_refiner or refiner is None:
with torch.autocast("cuda"):
image = pipe(
#prompt=prompt,
prompt_embeds=conditioning,
pooled_prompt_embeds=pooled,
seed=seed,
#negative_prompt=negative_prompt,
negative_prompt_embeds=negative_conditioning,
negative_pooled_prompt_embeds=negative_pooled,
width=width,
height=height,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator
).images[0]
else:
with torch.autocast("cuda"):
latents = pipe.to_latent(
#prompt=prompt,
prompt_embeds=conditioning,
pooled_prompt_embeds=pooled,
width=width,
seed=seed,
negative_prompt_embeds=negative_conditioning,
negative_pooled_prompt_embeds=negative_pooled,
#negative_prompt=negative_prompt,
height=height,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
output_type="latent"
).images
image = refiner(
#prompt=prompt,
prompt_embeds=conditioning,
pooled_prompt_embeds=pooled,
seed=seed,
negative_prompt_embeds=negative_conditioning,
negative_pooled_prompt_embeds=negative_pooled,
#negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
latents=latents,
generator=generator,
).images[0]
image_path = save_image(image)
mask_layer_image_path=None
print(("image_path", image_path, "seed", seed) if init_image_path is not None else ("control_net_image_path", control_net_image_path, "seed", seed))
#print ("image_to_image_path", image_to_image_path, "seed", seed, "current seed", i2i_seed)
#return image_path, image_to_image_path, seed, num_inference_steps, prompt, i2i_seed
return image_path, seed, num_inference_steps, prompt, control_net_image_path, mask_layer_image_path
# canny
'''
if init_image_path is not None and controlnet_type == "Canny":
model_id_or_path = "segmind/SSD-1B"
# load the controlnet model for canny edge detection
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
#controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
# load the stable diffusion pipeline with controlnet
#controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
#controlnet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, torch_dtype=torch.float16)
#pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
#controlnet_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# set scheduler
controlnet_pipe.scheduler = LCMScheduler.from_config(controlnet_pipe.scheduler.config)
# load LCM-LoRA
controlnet_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
# enable efficient implementations using xformers for faster inference
controlnet_pipe.enable_xformers_memory_efficient_attention()
controlnet_pipe.enable_model_cpu_offload()
#### DO
image_input = load_image(init_image_path) # Assuming load_image is defined
image_input = np.array(image_input)
# Define parameters for canny edge detection
low_threshold = 100
high_threshold = 200
# Do canny edge detection
image_canny = cv2.Canny(image_input, low_threshold, high_threshold)
image_canny = image_canny[:, :, None]
image_canny = np.concatenate([image_canny, image_canny, image_canny], axis=2)
image_canny = Image.fromarray(image_canny)
mask_layer_image_path = save_image(image_canny, False)
# Prepare arguments for the function call
call_args = {
"num_inference_steps": num_inference_steps,
"height": height,
"width": width,
"negative_prompt": negative_prompt,
"guidance_scale": guidance_scale,
"generator": generator
}
# Include 'latents' only if it's not None
if latents is not None:
call_args["latents"] = latents
# Function call with dynamic arguments
image_output = controlnet_pipe(prompt= prompt,image=image_canny, **call_args).images[0]
control_net_image_path = save_image(image_output)
# add second timeline handling where an alternate prompt was entered and we have recompute our compel embeddings and NOT increment the seed.
# seg map
if init_image_path is not None and controlnet_type == "Segmentation":
### SEGEMENTATION
# load the image processor and the model for doing segmentation
image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")
# load the controlnet model for semantic segmentation
seg_controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-seg", torch_dtype=torch.float16
)
# load the stable diffusion pipeline with controlnet
seg_pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
)
#seg_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# set scheduler
seg_pipe.scheduler = LCMScheduler.from_config(seg_pipe.scheduler.config)
# load LCM-LoRA
seg_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
seg_pipe.enable_xformers_memory_efficient_attention()
seg_pipe.enable_model_cpu_offload()
### DO
image_input = load_image(init_image_path) # Assuming load_image is defined
image_input = np.array(image_input)
# get the pixel values
pixel_values = image_processor(image_input, return_tensors="pt").pixel_values
# do semantic segmentation
with torch.no_grad():
outputs = image_segmentor(pixel_values)
print(image_input.size)
# post process the semantic segmentation
seg = image_processor.post_process_semantic_segmentation(outputs,target_sizes=[(width, height)])[0]
# Assuming 'known_height' is the height you want to use
#seg = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[(image_input.size, known_height)])[0]``
# add colors to the different identified classes
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
for label, color in enumerate(seg_palette):
color_seg[seg == label, :] = color
# convert into PIL image format
color_seg = color_seg.astype(np.uint8)
image_seg = Image.fromarray(color_seg)
mask_layer_image_path = save_image(image_seg, False)
# Prepare arguments for the function call
call_args = {
"num_inference_steps": num_inference_steps,
"height": height,
"width": width,
"negative_prompt": negative_prompt,
"guidance_scale": guidance_scale,
"generator": generator
}
# Include 'latents' only if it's not None
if latents is not None:
call_args["latents"] = latents
image_output = seg_pipe(prompt, image_seg, **call_args).images[0]
control_net_image_path = save_image(image_output)
# DEPTH
if init_image_path is not None and controlnet_type == "Depth":
### DEPTH
# load the depth estimator model
depth_estimator = pipeline('depth-estimation')
# load the controlnet model for depth estimation
depth_controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
)
# load the stable diffusion pipeline with controlnet
depth_pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=depth_controlnet, safety_checker=None, torch_dtype=torch.float16
)
#depth_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# set scheduler
depth_pipe.scheduler = LCMScheduler.from_config(depth_pipe.scheduler.config)
# load LCM-LoRA
depth_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
# enable efficient implementations using xformers for faster inference
depth_pipe.enable_xformers_memory_efficient_attention()
depth_pipe.enable_model_cpu_offload()
### DO
image_input = load_image(init_image_path)
image_input = image_input
# get depth estimates
image_depth = depth_estimator(image_input)['depth']
# convert to PIL image format
image_depth = np.array(image_depth)
image_depth = image_depth[:, :, None]
image_depth = np.concatenate([image_depth, image_depth, image_depth], axis=2)
image_depth = Image.fromarray(image_depth)
mask_layer_image_path = save_image(image_depth, False)
# Prepare arguments for the function call
call_args = {
"num_inference_steps": num_inference_steps,
"height": height,
"width": width,
"negative_prompt": negative_prompt,
"guidance_scale": guidance_scale,
"generator": generator
}
# Include 'latents' only if it's not None
if latents is not None:
call_args["latents"] = latents
image_output = depth_pipe(prompt, image_depth, **call_args).images[0]
control_net_image_path = save_image(image_output)
## Normals
if init_image_path is not None and controlnet_type == "Normal":
### NORMALS
# load the Dense Prediction Transformer (DPT) model for getting normal maps
normal_depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas")
# load the controlnet model for normal maps
normal_controlnet = ControlNetModel.from_pretrained(
"fusing/stable-diffusion-v1-5-controlnet-normal", torch_dtype=torch.float16
)
# load the stable diffusion pipeline with controlnet
normal_pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=normal_controlnet, safety_checker=None, torch_dtype=torch.float16
)
#normal_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# set scheduler
normal_pipe.scheduler = LCMScheduler.from_config(normal_pipe.scheduler.config)
# load LCM-LoRA
normal_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
normal_pipe.enable_xformers_memory_efficient_attention()
normal_pipe.enable_model_cpu_offload()
### DO
#image_input = load_image(init_image_path)
#image_input = np.array(image_input)
# do all the preprocessing to get the normal image
image = depth_estimator(init_image_path)['predicted_depth'][0]
image = image.numpy()
image_depth = image.copy()
image_depth -= np.min(image_depth)
image_depth /= np.max(image_depth)
bg_threhold = 0.4
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
x[image_depth < bg_threhold] = 0
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
y[image_depth < bg_threhold] = 0
z = np.ones_like(x) * np.pi * 2.0
image = np.stack([x, y, z], axis=2)
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
image_normal = Image.fromarray(image)
mask_layer_image_path = save_image(image_normal, False)
# Prepare arguments for the function call
call_args = {
"num_inference_steps": num_inference_steps,
"height": height,
"width": width,
"negative_prompt": negative_prompt,
"guidance_scale": guidance_scale,
"generator": generator
}
# Include 'latents' only if it's not None
if latents is not None:
call_args["latents"] = latents
image_output = normal_pipe(prompt, image_normal, **call_args).images[0]
control_net_image_path = save_image(image_output)
## OPENPOSE
if init_image_path is not None and controlnet_type == "OpenPose":
### OPEN POSE
# load the openpose model
#openpose = controlnet_aux.OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
# load the controlnet for openpose
openpose_controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16
)
# define stable diffusion pipeline with controlnet
openpose_pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=openpose_controlnet, safety_checker=None, torch_dtype=torch.float16
)
#openpose_pipe.scheduler = UniPCMultistepScheduler.from_config(openpose_pipe.scheduler.config)
# set scheduler
openpose_pipe.scheduler = LCMScheduler.from_config(openpose_pipe.scheduler.config)
# load LCM-LoRA
openpose_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
openpose_pipe.enable_xformers_memory_efficient_attention()
openpose_pipe.enable_model_cpu_offload()
### DO
image_input = load_image(init_image_path)
image_input = np.array(image_input)
image_pose = openpose(image_input)
mask_layer_image_path = save_image(image_pose, False)
# Prepare arguments for the function call
call_args = {
"num_inference_steps": num_inference_steps,
"height": height,
"width": width,
"negative_prompt": negative_prompt,
"guidance_scale": guidance_scale,
"generator": generator
}
# Include 'latents' only if it's not None
if latents is not None:
call_args["latents"] = latents
image_output = openpose_pipe(prompt, image_pose, **call_args).images[0]
control_net_image_path = save_image(image_output)
#i2i_seed = seed
#if (image_to_image):
# if (image_to_image_prompt == ""):
# i2i_seed = i2i_seed + 1
#process_img = False
# the case where we pass both
#if not is_empty_string(init_image_path) and image_to_image:
# init_image = Image.open(init_image_path).convert("RGB").resize((768, 512))
# print(f"initial init_image_path {init_image_path}")
# process_img = True
# the case where we pass only image_to_image
#elif image_to_image and is_empty_string(init_image_path) :
# init_image = Image.open(image_path).convert("RGB").resize((768, 512))
# print(f"initial init_image_path {init_image_path}")
# process_img = True
# the case where we pass only init_image_path
#elif init_image_path and not image_to_image :
# init_image = Image.open(init_image_path).convert("RGB").resize((768, 512))
# print(f"initial init_image_path {init_image_path}")
# process_img = True
# we now compute new compel embeddings and increment our seed by one conditionally
#if process_img:
# image = pipe_image_to_image(
# prompt=image_to_image_prompt,
# #prompt_embeds=i2i_conditioning,
# #pooled_prompt_embeds=i2i_pooled,
# image=init_image,
# strength=image_to_image_strength,
# seed=i2i_seed,
# negative_prompt=negative_prompt,
# width=width,
# height=height,
# guidance_scale=guidance_scale,
# num_inference_steps=num_inference_steps,
# generator=generator
# ).images[0]
# image_to_image_path = save_image(image)
#if init_image_path and image_to_image:
# image_to_image_path = save_image(init_image)
#if not image_to_image:
# image_to_image_path = None
'''
# Discord Bot Commands
@client.event
async def on_ready():
print(f'Logged in as {client.user}')
await tree.sync()
dimension_choices = [
app_commands.Choice(name="512", value=512),
app_commands.Choice(name="600", value=600),
app_commands.Choice(name="720", value=720),
app_commands.Choice(name="768", value=768),
app_commands.Choice(name="800", value=800),
app_commands.Choice(name="1024", value=1024),
app_commands.Choice(name="1280", value=1280),
app_commands.Choice(name="1440", value=1440),
app_commands.Choice(name="1600", value=1600),
app_commands.Choice(name="1920", value=1920),
app_commands.Choice(name="2048", value=2048),
]
img_choices = [
app_commands.Choice(name="6", value=6),
app_commands.Choice(name="5", value=5),
app_commands.Choice(name="4", value=4),
app_commands.Choice(name="3", value=3),
app_commands.Choice(name="2 (good for scale 2)", value=2),
app_commands.Choice(name="1 (good for scale 3 or 4)", value=1),
]
controlnet_choices =[
app_commands.Choice(name="Canny", value="Canny"),
app_commands.Choice(name="Depth", value="Depth"),
app_commands.Choice(name="Normal", value="Normal"),
app_commands.Choice(name="Segmentation", value="Segmentation"),
app_commands.Choice(name="OpenPose", value="OpenPose"),
]
scale_choices = [
app_commands.Choice(name="1 no upscale (default)", value=1.0),
app_commands.Choice(name="2 (1280x1024 max)", value=2.0),
app_commands.Choice(name="3 (800x800 max)", value=3.0),
app_commands.Choice(name="4 (600x600 max)", value=4.0),
app_commands.Choice(name="90%", value=0.90),
app_commands.Choice(name="75%", value=0.75),
app_commands.Choice(name="66%", value=0.66),
app_commands.Choice(name="50%", value=0.5),
app_commands.Choice(name="33%", value=0.3),
]
caption_model_choices = [
app_commands.Choice(name="blip-base", value="blip-base"),
app_commands.Choice(name="blip-large", value="blip-large"),
app_commands.Choice(name="git-large-coco", value="git-large-coco"),
]
clip_model_choices = [
app_commands.Choice(name="ViT-L-14/openai", value="ViT-L-14/openai"),
app_commands.Choice(name="ViT-H-14/laion2b_s32b_b79k", value="ViT-H-14/laion2b_s32b_b79k"),
]
clip_mode_choices = [
app_commands.Choice(name='best', value= 'best' ),
app_commands.Choice(name='fast', value= 'fast' ),
app_commands.Choice(name='classic', value= 'classic' ),
app_commands.Choice(name='negative', value= 'negative'),
]
captioning_type_choices = [
app_commands.Choice(name="image_analysis", value="image_analysis"),
app_commands.Choice(name="image_to_prompt", value="image_to_prompt"),
app_commands.Choice(name="image_to_prompt and analysis", value="both"),
]
# Utility function to safely extract the integer value
def get_int_value(choice_or_int):
return choice_or_int.value if hasattr(choice_or_int, 'value') else choice_or_int
async def download_image_attachment(attachment: discord.Attachment) -> str:
# Specify the directory where you want to save the image
save_directory = "./"
os.makedirs(save_directory, exist_ok=True)
# Construct the full path where the image will be saved
file_path = os.path.join(save_directory, attachment.filename)
# Download the image
async with aiohttp.ClientSession() as session:
async with session.get(attachment.url) as response:
if response.status == 200:
# Write the image to a file
with open(file_path, 'wb') as f:
f.write(await response.read())
else:
raise Exception(f"Failed to download image: HTTP {response.status}")
return file_path
@tree.command(name="img", description="Generate an image based on a prompt")
@app_commands.describe(prompt="The description for the image to generate")
@app_commands.choices(
width=dimension_choices,
height=dimension_choices,
scale=scale_choices,
img_count=img_choices,
caption_model=caption_model_choices,
clip_model=clip_model_choices,
captioning_type=captioning_type_choices,
clip_mode=clip_mode_choices,
#controlnet_type=controlnet_choices
)
@app_commands.describe(
prompt="The description for the image to generate",
seed="The seed for the image generation (random default)",
width="The width of the image (1024 default)",
height="The height of the image (1024 default)",
guidance=f"The guidance scale for image generation ({GUIDANCE_DEFAULT} default)",
steps=f"The number of inference steps ({STEPS_DEFAULT} default)",
refine="Whether to apply the refiner (True default)",
scale="upscale the image (1x default)" ,
img_count=f"how many in the batch? ({IMAGES_COUNT_DEFAULT} default)",
negative_prompt="add terms you dont want to see (optional)",
#image_to_image="Whether to Image to Image (False default)",
#image_to_image_strength="The strength of the Image to Image transfer (0.3 default)",
auto="Use prompt AI to extend prompt (False default)",
#image_to_image_prompt="The description for the image_to_image to generate",
#quality_terms="The number of quality terms to include in the image description (0 default)",
#lighting_terms="The number of lighting terms to include in the image description (0 default)",
#media_terms="The number of media terms to include in the image description (0 default)",
#random_real_artists="The number of random real artist names to include in the image description (0 default)",
#style_terms="The number of style terms to include in the image description (0 default)",
random_terms="Will include one random style, lighting quality and media terms with one real artist name (False default)",
#controlnet_type="The type of control network processing to apply (e.g., Canny, Depth)",
init_image="Initial image for Image to Image generation (optional)",
init_image_hidden="DO NOT SHOW init_image in outputs",
caption_model="Select the model for image captioning: 'blip-base', 'blip-large', or 'git-large-coco' (blip-large Default)",
clip_model="Choose the CLIP model for image analysis: 'ViT-L-14/openai' or 'ViT-H-14/laion2b_s32b_b79k' (ViT-L-14 Default)",
clip_mode="Choose the CLIP mode for image analysis: 'best', 'fast', 'classic', 'negative' (best Default)",
captioning_type="Type of captioning: 'image_analysis' for descriptive captions, 'image_to_prompt' for creative prompts",
target_mode="try to coerce one set of weights towards another",
target_strength=f"how much do we try to move towards the target weights 0 -> 1 ({TARGET_STRENGTH_DEFAULT} Default)"
)
async def img(
ctx: discord.Interaction,
prompt: str,
width: int = WIDTH_DEFAULT, # The parameter will be an int or a Choice object 1440
height: int = HEIGHT_DEFAULT, # The parameter will be an int or a Choice object 720
guidance: float = GUIDANCE_DEFAULT, # 1.0
steps: int = STEPS_DEFAULT, # 14
refine: bool = True,
scale: float = 1.0,
seed: int = 0,
img_count: int = IMAGES_COUNT_DEFAULT, # 6
#negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed"
negative_prompt: str = NEGATIVE_PROMPT_DEFAULT,
#image_to_image: bool = False,
#image_to_image_strength: float = 0.3,
auto :bool = False,
#image_to_image_prompt: str = ""
#quality_terms :int = 0,
#lighting_terms: int = 0,
#media_terms : int =0,
#random_real_artists :int =0,
#style_terms: int=0,
random_terms: bool=False,
#controlnet_type: str = "Depth",
init_image: discord.Attachment = None, # New parameter for initial image
init_image_hidden:bool=False,
caption_model: str = "blip-large",#' CAPTION_MODEL_DEFAULT,# "blip-base", # default value for caption model
clip_model: str = "ViT-L-14/openai",# CLIP_MODEL_DEFAULT,# "ViT-L-14/openai", # default value for clip model
clip_mode: str = "best",# 'fast', 'classic', 'negative'
captioning_type: str = "image_to_prompt",#CAPTION_TYPE_DEFAULT,# "image_to_prompt" # default value for captioning type
target_mode: bool=False,
target_strength: float = TARGET_STRENGTH_DEFAULT,
):
global DEFAULT_SCALE
global IMAGE_COUNT
global org_init_image_size
global ci
global target_conditioning
global target_pooled
global negative_target_conditioning
global negative_target_pooled
global target_prompt
global negative_target_prompt
process_start_time = time.time() # Start timing
IMAGE_COUNT = IMAGES_COUNT_DEFAULT
DEFAULT_SCALE = scale
# Extract integer values safely
width_value = get_int_value(width)
height_value = get_int_value(height)
# Seed handling logic
seed = seed if (seed is not None) and (seed > 0) else randomize_seed_fn(seed)
await ctx.response.defer()
images = []
descriptions = []
captioning_processing_time = 0
print(f"original prompt {prompt}")
# ADD PROMPT REQUESTS
#value_list = {
# "quality": quality_terms,
# "lighting": lighting_terms,
# "media": media_terms,
# "real_artists": random_real_artists,
# "style": style_terms,
#}
# Building the prompt using list comprehension
#prompt +=" " + " ".join(get_random_terms(list_name, value_list[list_name]) for list_name in value_list.keys())
total_size = 0
max_size_mb = 7.999
# Handling the initial image if provided
init_image_path = None
if init_image is not None:
# Download the image attachment
init_image_path = await download_image_attachment(init_image)
if init_image_path is not None:
config = Config()
config.clip_model_name = clip_model
config.caption_model_name = caption_model
ci = Interrogator(config)
start_time = time.time() # Start timing
if not target_mode:
if captioning_type == "image_to_prompt" or captioning_type == "both":
prompt +=" " + image_to_prompt(init_image_path, clip_mode)
negative_prompt = image_to_prompt(init_image_path, 'negative')
if target_mode:
#target_prompt = ""
#negative_target_prompt = ""
if captioning_type == "image_to_prompt" or captioning_type == "both":
target_prompt = image_to_prompt(init_image_path, clip_mode)
negative_target_prompt = image_to_prompt(init_image_path, 'negative')
target_conditioning, target_pooled = compel(target_prompt)
negative_target_conditioning, negative_target_pooled = compel(negative_target_prompt)
if captioning_type == "image_analysis" or captioning_type == "both":
medium, artist, movement, trending, flavor = image_analysis(init_image_path)
# Iterate through each attribute list and add to the prompt
for rank_dict in [medium, artist, movement, trending, flavor]:
for attribute, similarity in rank_dict.items():
prompt += f'{attribute}, '
#prompt += f'{attribute} ({similarity:.2f}), '
# Remove the trailing comma and space from the prompt
prompt = prompt.strip(', ')
end_time = time.time() # End timing
captioning_processing_time = end_time - start_time
if random_terms:
prompt +=" " + get_random_mix()
# Load the CLIP tokenizer
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
# Tokenize the text
tokenized_prompt = tokenizer.encode(prompt)
# if init_image_path is not None:
# print(f"Extended prompt: {prompt}")
# print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ")
if auto:
while len(tokenized_prompt) < 70:
prompt = sd_pipeline(prompt+',', num_return_sequences=1)[0]["generated_text"]
# Replace multiple spaces, newlines, and commas with a single space or comma respectively
prompt = re.sub(r'\s+', ' ', prompt)
prompt = re.sub(r',+', ',', prompt).strip()
print(f"Extended prompt: {prompt}")
# Tokenize the text
tokenized_prompt = tokenizer.encode(prompt)
#if len(tokenized_prompt) > 77:
# print(f"trimming to 77 Tokens")
# Truncate the prompt if it exceeds 77 tokens
#while len(tokenized_prompt) > 77:
# Remove the last word
# prompt = ' '+' '.join(prompt.split(' ')[:-1])
# # Re-tokenize
# tokenized_prompt = tokenizer.encode(prompt)
# Replace multiple spaces and newlines with a single space
prompt = re.sub(r'\s+', ' ', prompt).strip()
if init_image_path is not None:
print(f"Extended prompt: {prompt}")
print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ")
#if image_to_image:
# descriptions.append(f"`PROMPT`: {prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale} `I2I_STR`: {image_to_image_strength}")
#else:
for i in range(IMAGES_COUNT_DEFAULT):
# Call to your generate_for_discord function
current_seed = seed + i # if (not image_to_image) and (image_to_image_prompt == "") else seed
#image_path, image_to_image_path, used_seed, steps, prompt, i2i_seed = generate_for_discord(
image_path, used_seed, steps, final_prompt, control_net_image_path, mask_layer_image_path = generate_for_discord(
prompt,
width_value,
height_value,
guidance,
steps,
refine,
scale,
current_seed,
negative_prompt,
#image_to_image,
#image_to_image_strength,
auto,
#image_to_image_prompt
#quality_terms,
#lighting_terms,
#media_terms,
#random_real_artists,
#style_terms,
random_terms,
#controlnet_type,
init_image_path, # Pass the path of the downloaded initial image
target_mode,
target_strength,
)
if init_image is not None and len(init_image.filename) > 0 and i==0:
## resize mask
# Save the mask with a unique name
if not init_image_hidden:
mask_unique_name = str(uuid.uuid4()) + '.png'
init_image.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, mask_unique_name)))
mask_scale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_mask_scale_'+mask_unique_name);
RESIZE_SCALE = .25
w = init_image.width
h = init_image.height
new_w = int(w * RESIZE_SCALE)
new_h = int(h * RESIZE_SCALE)
processing_img = Image.open(init_image.filename)
mask_processed_img = processing_img.resize((new_w, new_h))
mask_processed_img.save(mask_scale_name)
org_init_image_size = os.path.getsize(mask_scale_name) / (1024 * 1024) # Convert size to MB
if total_size + org_init_image_size > max_size_mb:
break # Break the loop if the next image would exceed the limit
total_size += org_init_image_size
#descriptions.append(f"`NET_SEED`: {seed} `NET`: {controlnet_type} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(mask_scale_name, description=descriptions[len(descriptions)-1]))
if not scale < 1:
#if init_image_path is None:
image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path))) / (1024 * 1024) # Convert size to MB
# Check if adding the next image will exceed the total size limit
if total_size + image_size > max_size_mb:
break # Break the loop if the next image would exceed the limit
total_size += image_size
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB ")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path)), description=descriptions[len(descriptions)-1]))
#if init_image_path is not None:
if mask_layer_image_path is not None and i==0:
mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024) # Convert size to MB
if total_size + mask_layer_image_size > max_size_mb:
break # Break the loop if the next image would exceed the limit
total_size += mask_layer_image_size
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1]))
#if control_net_image_path is not None:
#init_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path))) / (1024 * 1024) # Convert size to MB
#if total_size + init_image_size > max_size_mb:
# break # Break the loop if the next image would exceed the limit
#total_size += init_image_size
#descriptions.append(f"`NET_SEED`: {seed} `NET`{controlnet_type} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
#descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
#images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path)), description=descriptions[len(descriptions)-1]))
# #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1]))
# control_net_image_path
#image_to_image
#if image_to_image:
# image_to_image_size = os.path.getsize(image_to_image_path) / (1024 * 1024) # Convert size to MB
# if total_size + image_to_image_size > max_size_mb:
# break # Break the loop if the next image would exceed the limit
# total_size += image_to_image_size
# descriptions.append(f"`I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_to_image_size)[:4]}MB")
# images.append(discord.File(image_to_image_path, description=descriptions[len(descriptions)-1]))
# #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1]))
upscale_control_net_image_size = None
upscale_image_size = None
if scale != 1:
if image_path is not None:
upscale_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{image_path}"
if init_image_path is not None and len(init_image.filename) > 0:
upscale_control_net_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{control_net_image_path}"
if image_path is not None:
upscale_image_size = os.path.getsize(upscale_image_path) / (1024 * 1024)
if init_image_path is not None and len(init_image.filename) > 0:
upscale_control_net_image_size = os.path.getsize(upscale_control_net_image_path) / (1024 * 1024)
if init_image_path is None:
if upscale_control_net_image_size is None and org_init_image_size > 0:
if total_size + upscale_image_size <= max_size_mb:
if scale <1:
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
else:
#descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ")
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1]))
else:
print( f"{total_size} + {upscale_image_size} >= {max_size_mb}")
break # Break the loop if the next image would exceed the limit
if upscale_control_net_image_size is None and org_init_image_size == 0:
if total_size + upscale_image_size <= max_size_mb:
if scale <1:
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
else:
#descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ")
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1]))
else:
print( f"{total_size} + {upscale_image_size} >= {max_size_mb}")
break # Break the loop if the next image would exceed the limit
# control net
if init_image_path is not None and len(init_image.filename) > 0:
if mask_layer_image_path is not None and i==0:
mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024) # Convert size to MB
if total_size + mask_layer_image_size > max_size_mb:
break # Break the loop if the next image would exceed the limit
total_size += mask_layer_image_size
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1]))
if total_size + upscale_control_net_image_size <= max_size_mb:
if scale <1:
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
else:
descriptions.append(
f"`SEED`: {used_seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,upscale_control_net_image_path)), description=descriptions[len(descriptions)-1]))
else:
print( f"{total_size} + {upscale_control_net_image_size} >= {max_size_mb}")
break # Break the loop if the next image would exceed the limit
total_end_time = time.time() # End timing
total_processing_time = total_end_time - process_start_time
descriptions.append(f"`PROMPT`: {final_prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale}\n`TOTAL_TIME` {str(total_processing_time)[:4]}sec\n")
if init_image_path is not None:
old_descrip = descriptions[len(descriptions)-1]
old_descrip = old_descrip+ f"`CAPTION_MODEL` {caption_model} `CLIP_MODEL` {clip_model} `CLIP_MODE` {clip_mode} `CAPTION_TYPE` {captioning_type} \n`TIME` {str(captioning_processing_time)[:4]}sec"
if target_mode:
old_descrip = descriptions[len(descriptions)-1]
old_descrip = old_descrip+ f"`TRGT_MODE` {target_mode} `TRGT_STR` {target_strength} `TRGT_PRMPT` {target_prompt} `NEG_TRGT_PRMPT` {negative_target_prompt}"
#if auto:
# await ctx.followup.send( str("\n".join(descriptions))+f"\nExtn: {final_prompt}", files=images)
#else:
await ctx.followup.send("\n".join(descriptions), files=images)
# Run the bot with your token
if __name__ == "__main__":
try:
client.run(os.getenv("DISCORD_BOT_TOKEN"))
except Exception as e:
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment