twobob/SSDbot.py

## SSDbot.py
#!/usr/bin/env python
import os
import random
import uuid
import subprocess
from typing import List, Optional
from tqdm import tqdm
import re
import time


### optional automated install
'''

def check_and_install(lib_name_mapping):
    for lib, runtime_name in lib_name_mapping.items():
        try:
            __import__(runtime_name)
            print(f'{runtime_name} is already installed.')
        except ImportError:
            print(f'Installing {runtime_name}...')
            install_cmd = ['pip', 'install', runtime_name]
            subprocess.run(install_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            print(f'{runtime_name} has been installed.')

# Library name mapping
lib_name_mapping = {
    'discord': 'discord',
    'transformers': 'transformers',
    'aiohttp': 'aiohttp',
    'numpy': 'numpy',
    'PIL': 'Pillow',
    'torch': 'torch',
    'diffusers': 'diffusers',
    'controlnet_aux': 'controlnet_aux',
    'dotenv': 'python-dotenv',
    'compel': 'compel',
    'gradio': 'gradio',
    'clip_interrogator': 'clip-interrogator',
    'cv2': 'opencv-contrib-python',
}

check_and_install(lib_name_mapping)

'''

##  clip-interrogator-0.6.0 huggingface-hub-0.19.4 open_clip_torch-2.23.0 tokenizers-0.15.0 protobuf-4.25.1

try:
    import cv2
except ImportError:
    print('Issue importing the cv2 module. Please install `pip install opencv-contrib-python`')

try:
    import discord
    from discord import app_commands
except ImportError:
    print('Issue importing the discord module. Please install `pip install discord`')

try:
    from transformers import pipeline , CLIPTokenizer, AutoImageProcessor, UperNetForSemanticSegmentation #, AutoModelForCausalLM , pipeline
except ImportError:
    print('Issue importing the transformers module. Please install `pip install transformers`')

try:
    import aiohttp
except ImportError:
    print('Issue importing the numpy module. Please install `pip install aiohttp`')

try:
    import numpy as np
except ImportError:
    print('Issue importing the numpy module. Please install `pip install numpy`')

try:
    from PIL import Image, ImageDraw, ImageFont
except ImportError:
    print('Issue importing the PIL module. Please install `pip install PIL`')

try:
    import torch
    from torch import autocast
except ImportError:
    print('Issue importing the torch module. Please install `pip install torch`')

try:
    from diffusers import LCMScheduler, AutoPipelineForText2Image, AutoencoderKL, StableDiffusionControlNetPipeline, StableDiffusionXLPipeline,  DiffusionPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
    from diffusers.utils import load_image
except ImportError:
    print('Issue importing the diffusers module. Please install `pip install diffusers`')

try:
    from controlnet_aux import OpenposeDetector
except ImportError:
    print('Issue importing the controlnet_aux module. Please install `pip install controlnet_aux`')

try:
    from dotenv import load_dotenv
except ImportError:
    print('Issue importing the dotenv module. Please install `pip install python-dotenv`')

try:
    from compel import Compel, ReturnedEmbeddingsType
except ImportError:
    print('Issue importing the compel module. Please install `pip install git+https://github.com/damian0815/compel/`')

try:
    #import gradio as gr
    from clip_interrogator import Config, Interrogator
except ImportError:
    print('Issue importing the clip_interrogator module. Please install `pip install open-clip-torch clip-interrogator`')


if not os.path.isfile('./realesrgan-ncnn-vulkan.exe'):
    print('Can not find `./realesrgan-ncnn-vulkan.exe` in current working directory')

# create env file if it does not exist with default values
# Read from it afterwards
#
if not os.path.isfile('.env'):
    with open(".env", "w") as f:
        f.write("DISCORD_BOT_TOKEN=\n")
        f.write("CACHE_EXAMPLES=1\n")
        f.write("MAX_IMAGE_SIZE=1024\n")
        f.write("USE_TORCH_COMPILE=1\n")
        f.write("ENABLE_CPU_OFFLOAD=0\n")
        f.write("ENABLE_REFINER=0")


# Load settings from the .env file
load_dotenv()

from lists import get_random_terms, seg_palette, get_random_mix

caption_model_name = 'blip-large' #@param ["blip-base", "blip-large", "git-large-coco"]
clip_model_name = 'ViT-L-14/openai' #@param ["ViT-L-14/openai", "ViT-H-14/laion2b_s32b_b79k"]

ci = None

def image_analysis(imagepath):

    image = load_image(imagepath)
    image = image.convert('RGB')
    image_features = ci.image_to_features(image)

    top_mediums = ci.mediums.rank(image_features, 5)
    top_artists = ci.artists.rank(image_features, 5)
    top_movements = ci.movements.rank(image_features, 5)
    top_trendings = ci.trendings.rank(image_features, 5)
    top_flavors = ci.flavors.rank(image_features, 5)

    medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
    artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
    movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))}
    trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))}
    flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}

    return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks

def image_to_prompt(imagepath, mode):
    ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024

    image = load_image(imagepath)

    image = image.convert('RGB')
    if mode == 'best':
        return ci.interrogate(image)

    elif mode == 'classic':
        return ci.interrogate_classic(image)

    elif mode == 'fast':
        return ci.interrogate_fast(image)

    elif mode == 'negative':
        return ci.interrogate_negative(image)


# Discord Bot Setup
intents = discord.Intents.default()
intents.message_content = True

intents = discord.Intents.default()
client = discord.Client(intents=intents)
tree = discord.app_commands.CommandTree(client)

# Environment Setup and Model Initialization
MAX_SEED = np.iinfo(np.int32).max
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "1") == "1"
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "1") == "1"
ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
ENABLE_REFINER = os.getenv("ENABLE_REFINER", "0") == "1"
DEFAULT_SCALE = 1
IMAGE_COUNT = 1
MAX_STEPS  = 100
unique_name = "oops.png"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Model query defaults

GUIDANCE_DEFAULT = 1.0
STEPS_DEFAULT = 14
WIDTH_DEFAULT = 800
HEIGHT_DEFAULT = 1280
IMAGES_COUNT_DEFAULT = 6

TARGET_STRENGTH_DEFAULT = 0.5

NEGATIVE_PROMPT_DEFAULT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"

DEFAULT_FONT_LOCATION = "C:\Windows\WinSxS\amd64_microsoft-windows-font-truetype-arial_31bf3856ad364e35_10.0.22621.1_none_d4193be3a119442b\arial.ttf"  # Path to a .ttf font file

DEFAULT_IMAGES_FOLDER_PREFIX = ".\\image\\"

CAPTION_MODEL_DEFAULT= "blip-base",  # default value for caption model
CLIP_MODEL_DEFAULT = "ViT-L-14/openai",  # default value for clip model
CAPTION_TYPE_DEFAULT= "image_to_prompt"  # default value for captioning type

###

vae = None
pipe = None
refiner = None
control_net_image_path = None
upscale_image_size = None

target_conditioning = None
target_pooled = None

target_prompt = ""
negative_target_prompt = ""

negative_target_conditioning = None
negative_target_pooled = None

#negative_prompt_two = ""

org_init_image_size = 0

adapter_id = "latent-consistency/lcm-lora-ssd-1b"

if torch.cuda.is_available():
    vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
    pipe = AutoPipelineForText2Image.from_pretrained(
    #pipe = StableDiffusionXLPipeline.from_pretrained(

        #"stabilityai/stable-diffusion-xl-base-1.0",
        "segmind/SSD-1B",
        vae=vae,
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )
    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

    if ENABLE_REFINER:
        refiner = DiffusionPipeline.from_pretrained(
            "stabilityai/stable-diffusion-xl-refiner-1.0",
            vae=vae,
            torch_dtype=torch.float16,
            use_safetensors=True,
            variant="fp16",
        )

    if ENABLE_CPU_OFFLOAD:
        pipe.enable_model_cpu_offload()
        if refiner is not None:
            refiner.enable_model_cpu_offload()
    else:
        pipe.to(device)
        if refiner is not None:
            refiner.to(device)
        pipe.load_lora_weights(adapter_id)
        pipe.fuse_lora()


    if USE_TORCH_COMPILE and not os.name == 'nt':  # Check if the OS is not Windows
        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
        if refiner is not None:
            refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)

    compel = Compel(
    tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
    text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=[False, True]
)

    compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)


# Image To image Pipeline

model_id_or_path = "segmind/SSD-1B"
#pipe_image_to_image = StableDiffusionXLImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
#pipe_image_to_image = pipe.to('cuda')

#init_image = Image.open("wub.png").convert("RGB").resize((768, 512))
#prompt = "A fantasy landscape, trending on artstation"

#images = pipe_image_to_image(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
#images[0].save("fantasy_landscape.png")


# AUTO PROMPT GENERATION

#sd_tokenizer = AutoTokenizer.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion')
#sd_model = AutoModelForCausalLM.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion')
sd_pipeline = pipeline('text-generation', model='Gustavosta/MagicPrompt-Stable-Diffusion', max_length=128, pad_token_id=0)

def save_image(img, add_watermark=True):
    global DEFAULT_SCALE
    # Load a font
    if add_watermark:
        font_path = DEFAULT_FONT_LOCATION
        try:
            font = ImageFont.truetype(font_path, 30)  # Adjust the size to fit your needs
        except IOError:
            font = ImageFont.load_default()

        # Add watermark
        watermark_text = "AI Enthusiasts"
        draw = ImageDraw.Draw(img)

        # Calculate the bounding box at (0, 0) position
        bbox = draw.textbbox((0, 0), watermark_text, font=font)

        # Calculate text width and height from bbox
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]

        # Calculate x, y for bottom right position
        x = img.width - text_width - 20  # 10 pixels from the right
        y = img.height - text_height - 20  # 10 pixels from the bottom

        # Draw the text
        draw.text((x, y), watermark_text, font=font, fill=(127, 127, 128))

    # Save the image with a unique name
    unique_name = str(uuid.uuid4()) + '.png'
    img.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, unique_name)))

    upscale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_upscale_'+unique_name)
    if DEFAULT_SCALE != 1:

        if img.width * img.height > 819200 and DEFAULT_SCALE > 3:
            DEFAULT_SCALE = 3
        if img.width  * img.height > 1310720 and DEFAULT_SCALE > 2:
            DEFAULT_SCALE = 2

        if DEFAULT_SCALE < 1:
            w, h = img.size
            new_w = int(w * DEFAULT_SCALE)
            new_h = int(h * DEFAULT_SCALE)
            processed_img = img.resize((new_w, new_h))
            processed_img.save(upscale_name)
        else:
            # Run the executable
            subprocess.run(['./realesrgan-ncnn-vulkan.exe', '-i', str(unique_name), '-o', upscale_name, '-s', str(DEFAULT_SCALE)], check=True)
            # Load the processed image
            processed_img = Image.open(upscale_name)

    return unique_name

def randomize_seed_fn(seed: int) -> int:
        seed = random.randint(0, MAX_SEED)
        return seed

def is_empty_string(s):
    return s is None or not s.strip()

# Function to generate images
# The complete generate_for_discord function
def generate_for_discord(
    prompt: str,
    width: int = WIDTH_DEFAULT,
    height: int = HEIGHT_DEFAULT,
    guidance_scale: float = GUIDANCE_DEFAULT,
    num_inference_steps: int = STEPS_DEFAULT,
    apply_refiner: bool = True,
    scale: float = 1.0,
    seed: int = 0,
    #negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed"
    negative_prompt: str = NEGATIVE_PROMPT_DEFAULT,
    #image_to_image: bool = False,
    #image_to_image_strength: float = 0.3,
    auto :bool = False,
    #image_to_image_prompt: str = ""
    #quality_terms :int = 0,
    #lighting_terms: int = 0,
    #media_terms : int =0,
    #random_real_artists :int =0,
    #style_terms: int=0,
    random_terms: bool=False,
    #controlnet_type: str="Depth",
    init_image_path: str = None,
    #caption_model: str = CAPTION_MODEL_DEFAULT,# "blip-base",  # default value for caption model
    #clip_model: str = CLIP_MODEL_DEFAULT,# "ViT-L-14/openai",  # default value for clip model
    #captioning_type: str = CAPTION_TYPE_DEFAULT,# "image_analysis"  # default value for captioning type
    target_mode: bool=False,
    target_strength: float = TARGET_STRENGTH_DEFAULT,
):
    global IMAGE_COUNT

    init_image = None
    control_net_image_path = None
    latents = None
    image = None
    image_path = None

    generator = torch.Generator().manual_seed(seed)
    if num_inference_steps > MAX_STEPS // IMAGE_COUNT  :
        num_inference_steps = MAX_STEPS // IMAGE_COUNT

    target_strength = max(0, min(target_strength, 1))

    conditioning, pooled = compel(prompt)

    negative_conditioning, negative_pooled = compel(negative_prompt)

    if init_image_path is not None and target_mode:

        conditioning = conditioning - ((conditioning - target_conditioning) * target_strength )
        pooled = pooled - ((pooled - target_pooled)* target_strength )
        negative_conditioning = negative_conditioning - ((negative_conditioning - negative_target_conditioning) * target_strength )
        negative_pooled = negative_pooled - ((negative_pooled - negative_target_pooled) * target_strength)

    #if image_to_image_prompt == "":
    #    image_to_image_prompt = prompt

# add support for second prompt embeddings

    #i2i_conditioning, i2i_pooled = compel(image_to_image_prompt)

    #image_to_image_path = "not_set"


    #if init_image_path is None:
    if not apply_refiner or refiner is None:
        with torch.autocast("cuda"):
            image = pipe(
                #prompt=prompt,
                prompt_embeds=conditioning,
                pooled_prompt_embeds=pooled,
                seed=seed,
                #negative_prompt=negative_prompt,
                negative_prompt_embeds=negative_conditioning,
                negative_pooled_prompt_embeds=negative_pooled,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                generator=generator
            ).images[0]
    else:
        with torch.autocast("cuda"):
            latents = pipe.to_latent(
                #prompt=prompt,
                prompt_embeds=conditioning,
                pooled_prompt_embeds=pooled,
                width=width,
                seed=seed,
                negative_prompt_embeds=negative_conditioning,
                negative_pooled_prompt_embeds=negative_pooled,
                #negative_prompt=negative_prompt,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                generator=generator,
                output_type="latent"
            ).images

            image = refiner(
                #prompt=prompt,
                prompt_embeds=conditioning,
                pooled_prompt_embeds=pooled,
                seed=seed,
                negative_prompt_embeds=negative_conditioning,
                negative_pooled_prompt_embeds=negative_pooled,
                #negative_prompt=negative_prompt,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                latents=latents,
                generator=generator,
            ).images[0]
    image_path = save_image(image)

    mask_layer_image_path=None


    print(("image_path", image_path, "seed", seed) if init_image_path is not None else ("control_net_image_path", control_net_image_path, "seed", seed))
    #print ("image_to_image_path", image_to_image_path, "seed", seed, "current seed", i2i_seed)
    #return image_path, image_to_image_path, seed, num_inference_steps, prompt, i2i_seed
    return image_path, seed, num_inference_steps, prompt, control_net_image_path, mask_layer_image_path

#    canny
'''

    if init_image_path is not None and controlnet_type == "Canny":


        model_id_or_path = "segmind/SSD-1B"


        # load the controlnet model for canny edge detection
        controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)

        #controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)

        # load the stable diffusion pipeline with controlnet

        #controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)


        controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
        #controlnet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, torch_dtype=torch.float16)

        #pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)

        #controlnet_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)


        # set scheduler
        controlnet_pipe.scheduler = LCMScheduler.from_config(controlnet_pipe.scheduler.config)

        # load LCM-LoRA
        controlnet_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")

        # enable efficient implementations using xformers for faster inference
        controlnet_pipe.enable_xformers_memory_efficient_attention()
        controlnet_pipe.enable_model_cpu_offload()

#### DO

        image_input = load_image(init_image_path)  # Assuming load_image is defined
        image_input = np.array(image_input)

        # Define parameters for canny edge detection
        low_threshold = 100
        high_threshold = 200

        # Do canny edge detection
        image_canny = cv2.Canny(image_input, low_threshold, high_threshold)
        image_canny = image_canny[:, :, None]
        image_canny = np.concatenate([image_canny, image_canny, image_canny], axis=2)
        image_canny = Image.fromarray(image_canny)

        mask_layer_image_path = save_image(image_canny, False)

        # Prepare arguments for the function call
        call_args = {
            "num_inference_steps": num_inference_steps,
            "height": height,
            "width": width,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "generator": generator
        }

        # Include 'latents' only if it's not None
        if latents is not None:
            call_args["latents"] = latents

        # Function call with dynamic arguments
        image_output = controlnet_pipe(prompt= prompt,image=image_canny, **call_args).images[0]

        control_net_image_path = save_image(image_output)

# add second timeline handling where an alternate prompt was entered and we have recompute our compel embeddings and NOT increment the seed.

# seg map

    if init_image_path is not None and controlnet_type == "Segmentation":

        ### SEGEMENTATION

        # load the image processor and the model for doing segmentation
        image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
        image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")

        # load the controlnet model for semantic segmentation
        seg_controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-seg", torch_dtype=torch.float16
        )

        # load the stable diffusion pipeline with controlnet
        seg_pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
        )
        #seg_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

        # set scheduler
        seg_pipe.scheduler = LCMScheduler.from_config(seg_pipe.scheduler.config)

        # load LCM-LoRA
        seg_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")


        seg_pipe.enable_xformers_memory_efficient_attention()
        seg_pipe.enable_model_cpu_offload()


### DO

        image_input = load_image(init_image_path)  # Assuming load_image is defined
        image_input = np.array(image_input)

        # get the pixel values
        pixel_values = image_processor(image_input, return_tensors="pt").pixel_values

        # do semantic segmentation
        with torch.no_grad():
            outputs = image_segmentor(pixel_values)

        print(image_input.size)

        # post process the semantic segmentation
        seg = image_processor.post_process_semantic_segmentation(outputs,target_sizes=[(width, height)])[0]

        # Assuming 'known_height' is the height you want to use
        #seg = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[(image_input.size, known_height)])[0]``


        # add colors to the different identified classes
        color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
        for label, color in enumerate(seg_palette):
            color_seg[seg == label, :] = color

        # convert into PIL image format
        color_seg = color_seg.astype(np.uint8)
        image_seg = Image.fromarray(color_seg)

        mask_layer_image_path =  save_image(image_seg, False)

        # Prepare arguments for the function call
        call_args = {
            "num_inference_steps": num_inference_steps,
            "height": height,
            "width": width,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "generator": generator
        }

        # Include 'latents' only if it's not None
        if latents is not None:
            call_args["latents"] = latents


        image_output = seg_pipe(prompt, image_seg, **call_args).images[0]

        control_net_image_path = save_image(image_output)

    # DEPTH

    if init_image_path is not None and controlnet_type == "Depth":


        ### DEPTH

        # load the depth estimator model
        depth_estimator = pipeline('depth-estimation')

        # load the controlnet model for depth estimation
        depth_controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
        )

        # load the stable diffusion pipeline with controlnet
        depth_pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", controlnet=depth_controlnet, safety_checker=None, torch_dtype=torch.float16
        )
        #depth_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)


        # set scheduler
        depth_pipe.scheduler = LCMScheduler.from_config(depth_pipe.scheduler.config)

        # load LCM-LoRA
        depth_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")

        # enable efficient implementations using xformers for faster inference
        depth_pipe.enable_xformers_memory_efficient_attention()
        depth_pipe.enable_model_cpu_offload()


### DO

        image_input = load_image(init_image_path)
        image_input = image_input

        # get depth estimates
        image_depth = depth_estimator(image_input)['depth']

        # convert to PIL image format
        image_depth = np.array(image_depth)
        image_depth = image_depth[:, :, None]
        image_depth = np.concatenate([image_depth, image_depth, image_depth], axis=2)
        image_depth = Image.fromarray(image_depth)

        mask_layer_image_path =  save_image(image_depth, False)
        # Prepare arguments for the function call
        call_args = {
            "num_inference_steps": num_inference_steps,
            "height": height,
            "width": width,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "generator": generator
        }

        # Include 'latents' only if it's not None
        if latents is not None:
            call_args["latents"] = latents


        image_output = depth_pipe(prompt, image_depth, **call_args).images[0]

        control_net_image_path = save_image(image_output)

## Normals

    if init_image_path is not None  and controlnet_type == "Normal":


        ### NORMALS

        # load the Dense Prediction Transformer (DPT) model for getting normal maps
        normal_depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas")

        # load the controlnet model for normal maps
        normal_controlnet = ControlNetModel.from_pretrained(
            "fusing/stable-diffusion-v1-5-controlnet-normal", torch_dtype=torch.float16
        )

        # load the stable diffusion pipeline with controlnet
        normal_pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", controlnet=normal_controlnet, safety_checker=None, torch_dtype=torch.float16
        )
        #normal_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

        # set scheduler
        normal_pipe.scheduler = LCMScheduler.from_config(normal_pipe.scheduler.config)

        # load LCM-LoRA
        normal_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")

        normal_pipe.enable_xformers_memory_efficient_attention()
        normal_pipe.enable_model_cpu_offload()


### DO

        #image_input = load_image(init_image_path)
        #image_input = np.array(image_input)

        # do all the preprocessing to get the normal image
        image = depth_estimator(init_image_path)['predicted_depth'][0]

        image = image.numpy()

        image_depth = image.copy()
        image_depth -= np.min(image_depth)
        image_depth /= np.max(image_depth)

        bg_threhold = 0.4

        x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
        x[image_depth < bg_threhold] = 0

        y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
        y[image_depth < bg_threhold] = 0

        z = np.ones_like(x) * np.pi * 2.0

        image = np.stack([x, y, z], axis=2)
        image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
        image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
        image_normal = Image.fromarray(image)

        mask_layer_image_path =  save_image(image_normal, False)
        # Prepare arguments for the function call
        call_args = {
            "num_inference_steps": num_inference_steps,
            "height": height,
            "width": width,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "generator": generator
        }

        # Include 'latents' only if it's not None
        if latents is not None:
            call_args["latents"] = latents


        image_output = normal_pipe(prompt, image_normal, **call_args).images[0]

        control_net_image_path = save_image(image_output)

## OPENPOSE

    if init_image_path is not None and controlnet_type == "OpenPose":

        ### OPEN POSE

        # load the openpose model
        #openpose = controlnet_aux.OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
        openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")

        # load the controlnet for openpose
        openpose_controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16
        )

        # define stable diffusion pipeline with controlnet
        openpose_pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", controlnet=openpose_controlnet, safety_checker=None, torch_dtype=torch.float16
        )
        #openpose_pipe.scheduler = UniPCMultistepScheduler.from_config(openpose_pipe.scheduler.config)

        # set scheduler
        openpose_pipe.scheduler = LCMScheduler.from_config(openpose_pipe.scheduler.config)

        # load LCM-LoRA
        openpose_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")


        openpose_pipe.enable_xformers_memory_efficient_attention()
        openpose_pipe.enable_model_cpu_offload()

### DO

        image_input = load_image(init_image_path)
        image_input = np.array(image_input)

        image_pose = openpose(image_input)
        mask_layer_image_path =  save_image(image_pose, False)
        # Prepare arguments for the function call
        call_args = {
            "num_inference_steps": num_inference_steps,
            "height": height,
            "width": width,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "generator": generator
        }

        # Include 'latents' only if it's not None
        if latents is not None:
            call_args["latents"] = latents

        image_output = openpose_pipe(prompt, image_pose, **call_args).images[0]

        control_net_image_path = save_image(image_output)

    #i2i_seed = seed

    #if (image_to_image):
    #    if (image_to_image_prompt == ""):
    #        i2i_seed = i2i_seed + 1

    #process_img = False

# the case where we pass both
    #if not is_empty_string(init_image_path) and image_to_image:
    #    init_image = Image.open(init_image_path).convert("RGB").resize((768, 512))
    #    print(f"initial init_image_path {init_image_path}")
    #    process_img = True

# the case where we pass only image_to_image

    #elif image_to_image and is_empty_string(init_image_path) :
    #    init_image = Image.open(image_path).convert("RGB").resize((768, 512))
    #    print(f"initial init_image_path {init_image_path}")
    #    process_img = True

# the case where we pass only init_image_path

    #elif init_image_path and not image_to_image :
    #    init_image = Image.open(init_image_path).convert("RGB").resize((768, 512))
    #    print(f"initial init_image_path {init_image_path}")
    #    process_img = True

# we now compute new compel embeddings and increment our seed by one conditionally
    #if process_img:
    #    image = pipe_image_to_image(
    #        prompt=image_to_image_prompt,
    #        #prompt_embeds=i2i_conditioning,
    #        #pooled_prompt_embeds=i2i_pooled,
    #        image=init_image,
    #        strength=image_to_image_strength,
    #        seed=i2i_seed,
    #        negative_prompt=negative_prompt,
    #        width=width,
    #        height=height,
    #        guidance_scale=guidance_scale,
    #        num_inference_steps=num_inference_steps,
    #        generator=generator
    #    ).images[0]
    #    image_to_image_path = save_image(image)

    #if init_image_path and image_to_image:
    #    image_to_image_path = save_image(init_image)
    #if not image_to_image:
    #    image_to_image_path = None

'''


# Discord Bot Commands
@client.event
async def on_ready():
    print(f'Logged in as {client.user}')
    await tree.sync()

dimension_choices = [
    app_commands.Choice(name="512", value=512),
    app_commands.Choice(name="600", value=600),
    app_commands.Choice(name="720", value=720),
    app_commands.Choice(name="768", value=768),
    app_commands.Choice(name="800", value=800),
    app_commands.Choice(name="1024", value=1024),
    app_commands.Choice(name="1280", value=1280),
    app_commands.Choice(name="1440", value=1440),
    app_commands.Choice(name="1600", value=1600),
    app_commands.Choice(name="1920", value=1920),
    app_commands.Choice(name="2048", value=2048),
]

img_choices = [
    app_commands.Choice(name="6", value=6),
    app_commands.Choice(name="5", value=5),
    app_commands.Choice(name="4", value=4),
    app_commands.Choice(name="3", value=3),
    app_commands.Choice(name="2 (good for scale 2)", value=2),
    app_commands.Choice(name="1 (good for scale 3 or 4)", value=1),
]

controlnet_choices =[
    app_commands.Choice(name="Canny", value="Canny"),
    app_commands.Choice(name="Depth", value="Depth"),
    app_commands.Choice(name="Normal", value="Normal"),
    app_commands.Choice(name="Segmentation", value="Segmentation"),
    app_commands.Choice(name="OpenPose", value="OpenPose"),
]

scale_choices = [
    app_commands.Choice(name="1 no upscale (default)", value=1.0),
    app_commands.Choice(name="2 (1280x1024 max)", value=2.0),
    app_commands.Choice(name="3 (800x800 max)", value=3.0),
    app_commands.Choice(name="4 (600x600 max)", value=4.0),
    app_commands.Choice(name="90%", value=0.90),
    app_commands.Choice(name="75%", value=0.75),
    app_commands.Choice(name="66%", value=0.66),
    app_commands.Choice(name="50%", value=0.5),
    app_commands.Choice(name="33%", value=0.3),
]

caption_model_choices = [
    app_commands.Choice(name="blip-base", value="blip-base"),
    app_commands.Choice(name="blip-large", value="blip-large"),
    app_commands.Choice(name="git-large-coco", value="git-large-coco"),
]

clip_model_choices = [
    app_commands.Choice(name="ViT-L-14/openai", value="ViT-L-14/openai"),
    app_commands.Choice(name="ViT-H-14/laion2b_s32b_b79k", value="ViT-H-14/laion2b_s32b_b79k"),
]

clip_mode_choices = [
    app_commands.Choice(name='best', value= 'best'   ),
    app_commands.Choice(name='fast', value= 'fast'   ),
    app_commands.Choice(name='classic', value= 'classic'   ),
    app_commands.Choice(name='negative', value= 'negative'),
]


captioning_type_choices = [
     app_commands.Choice(name="image_analysis", value="image_analysis"),
     app_commands.Choice(name="image_to_prompt", value="image_to_prompt"),
     app_commands.Choice(name="image_to_prompt and analysis", value="both"),
 ]


# Utility function to safely extract the integer value
def get_int_value(choice_or_int):
    return choice_or_int.value if hasattr(choice_or_int, 'value') else choice_or_int

async def download_image_attachment(attachment: discord.Attachment) -> str:
    # Specify the directory where you want to save the image
    save_directory = "./"
    os.makedirs(save_directory, exist_ok=True)

    # Construct the full path where the image will be saved
    file_path = os.path.join(save_directory, attachment.filename)

    # Download the image
    async with aiohttp.ClientSession() as session:
        async with session.get(attachment.url) as response:
            if response.status == 200:
                # Write the image to a file
                with open(file_path, 'wb') as f:
                    f.write(await response.read())
            else:
                raise Exception(f"Failed to download image: HTTP {response.status}")

    return file_path


@tree.command(name="img", description="Generate an image based on a prompt")
@app_commands.describe(prompt="The description for the image to generate")
@app_commands.choices(
    width=dimension_choices,
    height=dimension_choices,
    scale=scale_choices,
    img_count=img_choices,
    caption_model=caption_model_choices,
    clip_model=clip_model_choices,
    captioning_type=captioning_type_choices,
    clip_mode=clip_mode_choices,
    #controlnet_type=controlnet_choices
)
@app_commands.describe(
    prompt="The description for the image to generate",
    seed="The seed for the image generation (random default)",
    width="The width of the image (1024 default)",
    height="The height of the image (1024 default)",
    guidance=f"The guidance scale for image generation ({GUIDANCE_DEFAULT} default)",
    steps=f"The number of inference steps ({STEPS_DEFAULT} default)",
    refine="Whether to apply the refiner (True default)",
    scale="upscale the image (1x default)" ,
    img_count=f"how many in the batch? ({IMAGES_COUNT_DEFAULT} default)",
    negative_prompt="add terms you dont want to see (optional)",
    #image_to_image="Whether to Image to Image (False default)",
    #image_to_image_strength="The strength of the Image to Image transfer (0.3 default)",
    auto="Use prompt AI to extend prompt (False default)",
    #image_to_image_prompt="The description for the image_to_image to generate",
    #quality_terms="The number of quality terms to include in the image description (0 default)",
    #lighting_terms="The number of lighting terms to include in the image description (0 default)",
    #media_terms="The number of media terms to include in the image description (0 default)",
    #random_real_artists="The number of random real artist names to include in the image description (0 default)",
    #style_terms="The number of style terms to include in the image description (0 default)",
    random_terms="Will include one random style, lighting quality and media terms with one real artist name (False default)",
    #controlnet_type="The type of control network processing to apply (e.g., Canny, Depth)",
    init_image="Initial image for Image to Image generation (optional)",
    init_image_hidden="DO NOT SHOW init_image in outputs",
    caption_model="Select the model for image captioning: 'blip-base', 'blip-large', or 'git-large-coco' (blip-large Default)",
    clip_model="Choose the CLIP model for image analysis: 'ViT-L-14/openai' or 'ViT-H-14/laion2b_s32b_b79k' (ViT-L-14 Default)",
    clip_mode="Choose the CLIP mode for image analysis: 'best', 'fast', 'classic', 'negative' (best Default)",
    captioning_type="Type of captioning: 'image_analysis' for descriptive captions, 'image_to_prompt' for creative prompts",
    target_mode="try to coerce one set of weights towards another",
    target_strength=f"how much do we try to move towards the target weights 0 -> 1 ({TARGET_STRENGTH_DEFAULT} Default)"
)


async def img(
        ctx: discord.Interaction,
        prompt: str,
        width: int = WIDTH_DEFAULT,  # The parameter will be an int or a Choice object  1440
        height: int = HEIGHT_DEFAULT, # The parameter will be an int or a Choice object  720
        guidance: float = GUIDANCE_DEFAULT, # 1.0
        steps: int = STEPS_DEFAULT, # 14
        refine: bool = True,
        scale: float = 1.0,
        seed: int = 0,
        img_count: int = IMAGES_COUNT_DEFAULT, # 6
        #negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed"
        negative_prompt: str = NEGATIVE_PROMPT_DEFAULT,
        #image_to_image: bool = False,
        #image_to_image_strength: float = 0.3,
        auto :bool = False,
        #image_to_image_prompt: str = ""
        #quality_terms :int = 0,
        #lighting_terms: int = 0,
        #media_terms : int =0,
        #random_real_artists :int =0,
        #style_terms: int=0,
        random_terms: bool=False,
        #controlnet_type: str = "Depth",
        init_image: discord.Attachment = None,  # New parameter for initial image
        init_image_hidden:bool=False,
        caption_model: str = "blip-large",#' CAPTION_MODEL_DEFAULT,# "blip-base",  # default value for caption model
        clip_model: str = "ViT-L-14/openai",# CLIP_MODEL_DEFAULT,# "ViT-L-14/openai",  # default value for clip model
        clip_mode: str = "best",# 'fast', 'classic', 'negative'
        captioning_type: str = "image_to_prompt",#CAPTION_TYPE_DEFAULT,# "image_to_prompt"  # default value for captioning type
        target_mode: bool=False,
        target_strength: float = TARGET_STRENGTH_DEFAULT,
            ):

    global DEFAULT_SCALE
    global IMAGE_COUNT
    global org_init_image_size
    global ci
    global target_conditioning
    global target_pooled
    global negative_target_conditioning
    global negative_target_pooled
    global target_prompt
    global negative_target_prompt

    process_start_time = time.time()  # Start timing

    IMAGE_COUNT = IMAGES_COUNT_DEFAULT
    DEFAULT_SCALE = scale
    # Extract integer values safely
    width_value = get_int_value(width)
    height_value = get_int_value(height)

    # Seed handling logic
    seed = seed if (seed is not None) and (seed > 0) else  randomize_seed_fn(seed)

    await ctx.response.defer()

    images = []
    descriptions = []

    captioning_processing_time = 0


    print(f"original prompt  {prompt}")

    # ADD PROMPT REQUESTS
    #value_list = {
    #    "quality": quality_terms,
    #    "lighting": lighting_terms,
    #    "media": media_terms,
    #    "real_artists": random_real_artists,
    #    "style": style_terms,
    #}

    # Building the prompt using list comprehension
    #prompt +=" " + " ".join(get_random_terms(list_name, value_list[list_name]) for list_name in value_list.keys())

    total_size = 0
    max_size_mb = 7.999

    # Handling the initial image if provided
    init_image_path = None
    if init_image is not None:
        # Download the image attachment
        init_image_path = await download_image_attachment(init_image)


    if init_image_path is not None:

        config = Config()
        config.clip_model_name = clip_model
        config.caption_model_name = caption_model
        ci = Interrogator(config)

        start_time = time.time()  # Start timing

        if not target_mode:

                if captioning_type == "image_to_prompt" or captioning_type == "both":
                    prompt +=" " + image_to_prompt(init_image_path, clip_mode)
                    negative_prompt = image_to_prompt(init_image_path, 'negative')


        if target_mode:

                #target_prompt = ""
                #negative_target_prompt = ""

                if captioning_type == "image_to_prompt" or captioning_type == "both":
                    target_prompt = image_to_prompt(init_image_path, clip_mode)
                    negative_target_prompt = image_to_prompt(init_image_path, 'negative')

                target_conditioning, target_pooled = compel(target_prompt)
                negative_target_conditioning, negative_target_pooled = compel(negative_target_prompt)


        if captioning_type == "image_analysis" or captioning_type == "both":
            medium, artist, movement, trending, flavor = image_analysis(init_image_path)
            # Iterate through each attribute list and add to the prompt
            for rank_dict in [medium, artist, movement, trending, flavor]:
                for attribute, similarity in rank_dict.items():
                    prompt += f'{attribute}, '
                    #prompt += f'{attribute} ({similarity:.2f}), '

            # Remove the trailing comma and space from the prompt
            prompt = prompt.strip(', ')

        end_time = time.time()  # End timing
        captioning_processing_time = end_time - start_time


    if random_terms:
        prompt +=" " + get_random_mix()

    # Load the CLIP tokenizer
    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

        # Tokenize the text
    tokenized_prompt = tokenizer.encode(prompt)

#    if init_image_path is not None:
#        print(f"Extended prompt: {prompt}")
#    print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ")

    if auto:
        while len(tokenized_prompt)  < 70:
                prompt = sd_pipeline(prompt+',', num_return_sequences=1)[0]["generated_text"]
                # Replace multiple spaces, newlines, and commas with a single space or comma respectively
                prompt = re.sub(r'\s+', ' ', prompt)
                prompt = re.sub(r',+', ',', prompt).strip()
                print(f"Extended prompt: {prompt}")
                # Tokenize the text
                tokenized_prompt = tokenizer.encode(prompt)

    #if len(tokenized_prompt) > 77:
    #    print(f"trimming to 77 Tokens")

    # Truncate the prompt if it exceeds 77 tokens
    #while len(tokenized_prompt) > 77:
        # Remove the last word
    #    prompt = ' '+' '.join(prompt.split(' ')[:-1])
    #    # Re-tokenize
    #    tokenized_prompt = tokenizer.encode(prompt)

    # Replace multiple spaces and newlines with a single space
    prompt = re.sub(r'\s+', ' ', prompt).strip()

    if init_image_path is not None:
        print(f"Extended prompt: {prompt}")
    print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ")


    #if image_to_image:
    #        descriptions.append(f"`PROMPT`: {prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale} `I2I_STR`: {image_to_image_strength}")
    #else:


    for i in range(IMAGES_COUNT_DEFAULT):
    # Call to your generate_for_discord function
        current_seed = seed + i # if (not image_to_image) and (image_to_image_prompt == "") else seed

        #image_path, image_to_image_path, used_seed, steps, prompt, i2i_seed = generate_for_discord(
        image_path, used_seed, steps, final_prompt, control_net_image_path, mask_layer_image_path = generate_for_discord(
            prompt,
            width_value,
            height_value,
            guidance,
            steps,
            refine,
            scale,
            current_seed,
            negative_prompt,
            #image_to_image,
            #image_to_image_strength,
            auto,
            #image_to_image_prompt
            #quality_terms,
            #lighting_terms,
            #media_terms,
            #random_real_artists,
            #style_terms,
            random_terms,
            #controlnet_type,
            init_image_path, # Pass the path of the downloaded initial image
            target_mode,
            target_strength,
        )

        if init_image is not None and len(init_image.filename) > 0 and i==0:


## resize mask

            # Save the mask with a unique name
            if not init_image_hidden:
                mask_unique_name = str(uuid.uuid4()) + '.png'
                init_image.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, mask_unique_name)))

                mask_scale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_mask_scale_'+mask_unique_name);
                RESIZE_SCALE = .25

                w = init_image.width
                h = init_image.height
                new_w = int(w * RESIZE_SCALE)
                new_h = int(h * RESIZE_SCALE)

                processing_img = Image.open(init_image.filename)

                mask_processed_img = processing_img.resize((new_w, new_h))
                mask_processed_img.save(mask_scale_name)

                org_init_image_size = os.path.getsize(mask_scale_name) / (1024 * 1024)  # Convert size to MB

                if total_size + org_init_image_size > max_size_mb:
                    break  # Break the loop if the next image would exceed the limit
                total_size += org_init_image_size

                #descriptions.append(f"`NET_SEED`: {seed} `NET`: {controlnet_type} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                images.append(discord.File(mask_scale_name, description=descriptions[len(descriptions)-1]))

        if not scale < 1:
            #if init_image_path is None:
            image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path))) / (1024 * 1024)  # Convert size to MB
            # Check if adding the next image will exceed the total size limit
            if total_size + image_size > max_size_mb:
                break  # Break the loop if the next image would exceed the limit
            total_size += image_size
            descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB ")#\n`PROMPT`: {final_prompt} ")
            images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path)), description=descriptions[len(descriptions)-1]))
            #if init_image_path is not None:

            if mask_layer_image_path is not None and i==0:
                mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024)  # Convert size to MB
                if total_size + mask_layer_image_size > max_size_mb:
                    break  # Break the loop if the next image would exceed the limit
                total_size += mask_layer_image_size
                descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1]))

                #if control_net_image_path is not None:
                #init_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path))) / (1024 * 1024)  # Convert size to MB
                #if total_size + init_image_size > max_size_mb:
                #    break  # Break the loop if the next image would exceed the limit
                #total_size += init_image_size
                #descriptions.append(f"`NET_SEED`: {seed} `NET`{controlnet_type} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                #descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                #images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path)), description=descriptions[len(descriptions)-1]))
            #    #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1]))
            #    control_net_image_path
#image_to_image
            #if image_to_image:
            #    image_to_image_size = os.path.getsize(image_to_image_path) / (1024 * 1024)  # Convert size to MB
            #    if total_size + image_to_image_size > max_size_mb:
            #        break  # Break the loop if the next image would exceed the limit
            #    total_size += image_to_image_size
            #    descriptions.append(f"`I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_to_image_size)[:4]}MB")
            #    images.append(discord.File(image_to_image_path, description=descriptions[len(descriptions)-1]))
            #    #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1]))
        upscale_control_net_image_size = None
        upscale_image_size = None
        if scale != 1:
            if image_path is not None:
                upscale_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{image_path}"
            if init_image_path is not None and len(init_image.filename) > 0:
                upscale_control_net_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{control_net_image_path}"
            if image_path is not None:
                upscale_image_size = os.path.getsize(upscale_image_path) / (1024 * 1024)
            if init_image_path is not None and len(init_image.filename) > 0:
                upscale_control_net_image_size = os.path.getsize(upscale_control_net_image_path) / (1024 * 1024)

            if init_image_path is None:
                if upscale_control_net_image_size is None and org_init_image_size > 0:
                    if total_size + upscale_image_size <= max_size_mb:
                        if scale <1:
                            descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                        else:
                            #descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ")
                            descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                        images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1]))
                    else:
                        print( f"{total_size} + {upscale_image_size} >= {max_size_mb}")
                        break  # Break the loop if the next image would exceed the limit
                if upscale_control_net_image_size is None and org_init_image_size == 0:
                    if total_size + upscale_image_size <= max_size_mb:
                        if scale <1:
                            descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                        else:
                            #descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ")
                            descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                        images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1]))
                    else:
                        print( f"{total_size} + {upscale_image_size} >= {max_size_mb}")
                        break  # Break the loop if the next image would exceed the limit

# control net

            if init_image_path is not None and len(init_image.filename) > 0:

                if mask_layer_image_path is not None and i==0:
                    mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024)  # Convert size to MB
                    if total_size + mask_layer_image_size > max_size_mb:
                        break  # Break the loop if the next image would exceed the limit
                    total_size += mask_layer_image_size
                    descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ")
                    images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1]))

                if total_size + upscale_control_net_image_size <= max_size_mb:
                    if scale <1:
                        descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                    else:
                        descriptions.append(
                            f"`SEED`: {used_seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt}  ")
                    images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,upscale_control_net_image_path)), description=descriptions[len(descriptions)-1]))
                else:
                    print( f"{total_size} + {upscale_control_net_image_size} >= {max_size_mb}")
                    break  # Break the loop if the next image would exceed the limit

    total_end_time = time.time()  # End timing
    total_processing_time = total_end_time - process_start_time

    descriptions.append(f"`PROMPT`: {final_prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale}\n`TOTAL_TIME` {str(total_processing_time)[:4]}sec\n")

    if init_image_path is not None:

        old_descrip = descriptions[len(descriptions)-1]
        old_descrip = old_descrip+ f"`CAPTION_MODEL` {caption_model} `CLIP_MODEL` {clip_model} `CLIP_MODE` {clip_mode} `CAPTION_TYPE` {captioning_type} \n`TIME` {str(captioning_processing_time)[:4]}sec"

        if target_mode:
            old_descrip = descriptions[len(descriptions)-1]
            old_descrip = old_descrip+ f"`TRGT_MODE` {target_mode} `TRGT_STR` {target_strength} `TRGT_PRMPT` {target_prompt} `NEG_TRGT_PRMPT` {negative_target_prompt}"

    #if auto:
    #    await ctx.followup.send( str("\n".join(descriptions))+f"\nExtn: {final_prompt}", files=images)
    #else:
    await ctx.followup.send("\n".join(descriptions), files=images)

# Run the bot with your token
if __name__ == "__main__":
    try:
        client.run(os.getenv("DISCORD_BOT_TOKEN"))
    except Exception as e:
        print(e)