scottjg/stable_diffusion_slackbot.py

## stable_diffusion_slackbot.py
# ---
# output-directory: "/tmp/stable-diffusion"
# ---
# # Stable diffusion slackbot
#
# This tutorial shows you how to build a Slackbot that uses
# [stable diffusion](https://stability.ai/blog/stable-diffusion-public-release)
# to produce realistic images from text prompts on demand.
#
# ![stable diffusion slackbot](./stable_diff_screenshot.jpg)

# ## Basic setup

import io
import os
from typing import Optional

import modal

# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
# the application. Let's give it a friendly name.

stub = modal.Stub("example-stable-diff-bot")

# ## Inference Function
#
# ### HuggingFace token
#
# We're going to use the pre-trained
# [stable diffusion model](https://github.com/runwayml/stable-diffusion-v1-5) in
# HuggingFace's `diffusers` library. To gain access, you need to sign in to your
# HuggingFace account ([sign up here](https://huggingface.co/join)) and request
# access on the [model card page](https://huggingface.co/runwayml/stable-diffusion-v1-5).
#
# Next, [create a HuggingFace access token](https://huggingface.co/settings/tokens).
# To access the token in a Modal function, we can create a secret on the
# [secrets page](https://modal.com/secrets). Let's use the environment variable
# named `HUGGINGFACE_TOKEN`. Functions that inject this secret will have access
# to the environment variable.
#
# ![create a huggingface token](./huggingface_token.png)
#

# ## Model dependencies
#
# Your model will be running remotely inside a container. We will be installing
# all the model dependencies in the next step. We will also be "baking the model"
# into the image by running a Python function as a part of building the image.
# This lets us start containers much faster, since all the data that's needed is
# already inside the image.

model_id = "stabilityai/stable-diffusion-2-1-base"
cache_path = "/vol/cache/sb21"


def download_models():
    # version: 1
    import diffusers
    import torch
    hugging_face_token = os.environ["HUGGINGFACE_TOKEN"]

    # Download scheduler configuration. Experiment with different schedulers
    # to identify one that works best for your use-case.
    scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
        model_id, subfolder="scheduler", use_auth_token=hugging_face_token, cache_dir=cache_path
    )
    scheduler.save_pretrained(cache_path, safe_serialization=True)

    # Downloads all other models.
    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
        model_id, use_auth_token=hugging_face_token, revision="fp16", torch_dtype=torch.float16, cache_dir=cache_path
    )
    pipe.save_pretrained(cache_path, safe_serialization=True)


image = (
    modal.Image.debian_slim()
    .pip_install(
        "accelerate",
        "diffusers[torch]>=0.10",
        "ftfy",
        "torch",
        "torchvision",
        "transformers",
        "triton",
        "safetensors",
        "xformers==0.0.16rc393",
    )
    .run_function(
        download_models,
        secrets=[modal.Secret.from_name("my-huggingface-secret")],
    )
)
stub.image = image

# ## Using container lifecycle methods
#
# Modal lets you implement code that runs every time a container starts. This
# can be a huge optimization when you're calling a function multiple times,
# since Modal reuses the same containers when possible.
#
# The way to implement this is to turn the Modal function into a method on a
# class that also implement the Python context manager interface, meaning it
# has the `__enter__` method (the `__exit__` method is optional).
#
# We have also have applied a few model optimizations to make the model run
# faster. On an A10G, the model takes about 6.5s to load into memory, and then
# 1.6s per generation on average. On a T4, it takes 13s to load and 3.7s per
# generation. Other optimizations are also available [here](https://huggingface.co/docs/diffusers/optimization/fp16#memory-and-speed).

# This is our Modal function. The function runs through the `StableDiffusionPipeline` pipeline.
# It sends the PIL image back to our CLI where we save the resulting image in a local file.


class StableDiffusion:
    def __enter__(self):
        import diffusers
        import torch

        torch.backends.cuda.matmul.allow_tf32 = True

        scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
            cache_path,
            subfolder="scheduler",
            solver_order=2,
            prediction_type="epsilon",
            thresholding=False,
            algorithm_type="dpmsolver++",
            solver_type="midpoint",
            denoise_final=True,  # important if steps are <= 10
        )
        self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(cache_path, scheduler=scheduler).to("cuda")
        self.pipe.enable_xformers_memory_efficient_attention()

    @stub.function(gpu="A10G")
    async def run_inference(self, prompt: str, channel_id: Optional[str] = None, steps: int = 100, batch_size: int = 4) -> list[map]:
        import torch

        with torch.inference_mode():
            with torch.autocast("cuda"):
                images = self.pipe([prompt] * batch_size, num_inference_steps=steps, guidance_scale=7.0).images


        uploads = []
        # Convert PIL Image to PNG byte array.
        for image in images:
            with io.BytesIO() as buf:
                image.save(buf, format="PNG")
                img_bytes = buf.getvalue()
                uploads.append({
                    "content": img_bytes,
                    "title": prompt,
                    "filename": f"{prompt}.png",
                })

        if channel_id:
            # `post_image_to_slack` is implemented further below.
            post_image_to_slack.call(prompt, channel_id, uploads)

        return uploads


# @stub.function(
#     gpu="A10G",
#     image=(
#         modal.Image.debian_slim()
#         .pip_install(
#             "accelerate",
#             "diffusers[torch]>=0.10",
#             "ftfy",
#             "torch",
#             "torchvision",
#             "transformers",
#             "triton",
#             "safetensors",
#             "xformers==0.0.16rc393",
#         )

#     ),
#     shared_volumes={CACHE_PATH: volume},
#     secret=modal.Secret.from_name("my-huggingface-secret"),
# )
# async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
#     import torch
#     from diffusers import StableDiffusionPipeline

#     torch.backends.cuda.matmul.allow_tf32 = True

#     pipe = StableDiffusionPipeline.from_pretrained(
#         "stabilityai/stable-diffusion-2-1-base",
#         use_auth_token=os.environ["HUGGINGFACE_TOKEN"],
#         revision="fp16",
#         torch_dtype=torch.float16,
#         cache_dir=CACHE_PATH,
#         device_map="auto",
#     ).to("cuda")
#     pipe.stub()

#     image = pipe(prompt, num_inference_steps=100).images[0]

#     # Convert PIL Image to PNG byte array.
#     with io.BytesIO() as buf:
#         image.save(buf, format="PNG")
#         img_bytes = buf.getvalue()

#     if channel_name:
#         # `post_image_to_slack` is implemented further below.
#         post_image_to_slack.call(prompt, channel_name, img_bytes)

#     return img_bytes


# ## Slack webhook
#
# Now that we wrote our function, we'd like to trigger it from Slack. We can do
# this with [slash commands](https://api.slack.com/interactivity/slash-commands)
# — a feature that lets you register prefixes (such as `/run-my-bot`) to
# trigger webhooks of your choice.
#
# To serve our model as a web endpoint, we apply the
# [`@stub.webhook`](/docs/guide/webhooks#webhook) decorator in place of
# `@stub.function`. Modal webhooks are [FastAPI](https://fastapi.tiangolo.com/)
# endpoints by default (though we accept any ASGI web framework). This webhook
# retrieves the form body passed from Slack.
#
# Instead of blocking on the result of the stable diffusion model (which could
# take some time), we want to notify the user immediately that their request
# is being processed. Modal Functions let you
# [`submit`](/docs/reference/modal.Function#submit) an input without waiting for
# the results, which we use here to kick off model inference as a background task.

from fastapi import Request

@stub.webhook(method="POST")
async def entrypoint(request: Request):
    body = await request.form()
    channel_id = body["channel_id"]
    prompt = body["text"]
    sd = StableDiffusion()
    sd.run_inference.spawn(prompt, channel_id)
    return {
        "text": f"Running stable diffusion for {prompt}.",
        "response_type": "in_channel",
    }


# ## Post to Slack
#
# Finally, let's define a function to post images to a Slack channel.
#
# First, we need to create a Slack app and store the token for our app as a
# Modal secret. To do so, visit the the Modal [Secrets](/secrets) page and click
# "create a Slack secret". Then, you will find instructions on how to create a
# Slack app, give it OAuth permissions, and get a token. Note that you need to
# add the `file:write` OAuth scope to the created app.
#
# ![create a slack secret](./slack_secret.png)
#
# Below, we use the secret and `slack-sdk` to post to a Slack channel.


@stub.function(
    image=modal.Image.debian_slim().pip_install("slack-sdk"),
    secret=modal.Secret.from_name("my-slack-secret"),
)
def post_image_to_slack(title: str, channel_id: str, file_uploads: list[map]):
    import slack_sdk

    client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"])
    client.files_upload_v2(channel=channel_id, file_uploads=file_uploads)


# ## Deploy the Slackbot
#
# That's all the code we need! To deploy your application, run
#
# ```shell
# modal app deploy stable_diffusion_slackbot.py
# ```
#
# If successful, this will print a URL for your new webhook. To point your Slack
# app at it:
#
# - Go back to the [Slack apps page](https://api.slack.com/apps/).
# - Find your app and navigate to "Slash Commands" under "Features" in the left
#   sidebar.
# - Click on "Create New Command" and paste the webhook URL from Modal into the
#   "Request URL" field.
# - Name the command whatever you like, and hit "Save".
# - Reinstall the app to your workspace.
#
# We're done! 🎉 Install the app to any channel you're in, and you can trigger it
# with the command you chose above.
#
# ## Run Manually
#
# We can also trigger `run_stable_diffusion` manually for easier debugging.

OUTPUT_DIR = "/tmp/stable-diffusion"

if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        prompt = sys.argv[1]
    else:
        prompt = "oil painting of a shiba"

    os.makedirs(OUTPUT_DIR, exist_ok=True)

    with stub.run():
        img_bytes = run_stable_diffusion.call(prompt)
        output_path = os.path.join(OUTPUT_DIR, "output.png")
        with open(output_path, "wb") as f:
            f.write(img_bytes)
        print(f"Wrote data to {output_path}")

# This code lets us call our script as follows:
#
# ```shell
# python stable_diffusion_slackbot.py "a photo of an astronaut riding a horse on mars"
# ```
#
# The resulting image can be found in `/tmp/render/output.png`.
	# ---
	# output-directory: "/tmp/stable-diffusion"
	# ---
	# # Stable diffusion slackbot
	#
	# This tutorial shows you how to build a Slackbot that uses
	# [stable diffusion](https://stability.ai/blog/stable-diffusion-public-release)
	# to produce realistic images from text prompts on demand.
	#
	# ![stable diffusion slackbot](./stable_diff_screenshot.jpg)

	# ## Basic setup

	import io
	import os
	from typing import Optional

	import modal

	# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
	# the application. Let's give it a friendly name.

	stub = modal.Stub("example-stable-diff-bot")

	# ## Inference Function
	#
	# ### HuggingFace token
	#
	# We're going to use the pre-trained
	# [stable diffusion model](https://github.com/runwayml/stable-diffusion-v1-5) in
	# HuggingFace's `diffusers` library. To gain access, you need to sign in to your
	# HuggingFace account ([sign up here](https://huggingface.co/join)) and request
	# access on the [model card page](https://huggingface.co/runwayml/stable-diffusion-v1-5).
	#
	# Next, [create a HuggingFace access token](https://huggingface.co/settings/tokens).
	# To access the token in a Modal function, we can create a secret on the
	# [secrets page](https://modal.com/secrets). Let's use the environment variable
	# named `HUGGINGFACE_TOKEN`. Functions that inject this secret will have access
	# to the environment variable.
	#
	# ![create a huggingface token](./huggingface_token.png)
	#

	# ## Model dependencies
	#
	# Your model will be running remotely inside a container. We will be installing
	# all the model dependencies in the next step. We will also be "baking the model"
	# into the image by running a Python function as a part of building the image.
	# This lets us start containers much faster, since all the data that's needed is
	# already inside the image.

	model_id = "stabilityai/stable-diffusion-2-1-base"
	cache_path = "/vol/cache/sb21"


	def download_models():
	# version: 1
	import diffusers
	import torch
	hugging_face_token = os.environ["HUGGINGFACE_TOKEN"]

	# Download scheduler configuration. Experiment with different schedulers
	# to identify one that works best for your use-case.
	scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
	model_id, subfolder="scheduler", use_auth_token=hugging_face_token, cache_dir=cache_path
	)
	scheduler.save_pretrained(cache_path, safe_serialization=True)

	# Downloads all other models.
	pipe = diffusers.StableDiffusionPipeline.from_pretrained(
	model_id, use_auth_token=hugging_face_token, revision="fp16", torch_dtype=torch.float16, cache_dir=cache_path
	)
	pipe.save_pretrained(cache_path, safe_serialization=True)


	image = (
	modal.Image.debian_slim()
	.pip_install(
	"accelerate",
	"diffusers[torch]>=0.10",
	"ftfy",
	"torch",
	"torchvision",
	"transformers",
	"triton",
	"safetensors",
	"xformers==0.0.16rc393",
	)
	.run_function(
	download_models,
	secrets=[modal.Secret.from_name("my-huggingface-secret")],
	)
	)
	stub.image = image

	# ## Using container lifecycle methods
	#
	# Modal lets you implement code that runs every time a container starts. This
	# can be a huge optimization when you're calling a function multiple times,
	# since Modal reuses the same containers when possible.
	#
	# The way to implement this is to turn the Modal function into a method on a
	# class that also implement the Python context manager interface, meaning it
	# has the `__enter__` method (the `__exit__` method is optional).
	#
	# We have also have applied a few model optimizations to make the model run
	# faster. On an A10G, the model takes about 6.5s to load into memory, and then
	# 1.6s per generation on average. On a T4, it takes 13s to load and 3.7s per
	# generation. Other optimizations are also available [here](https://huggingface.co/docs/diffusers/optimization/fp16#memory-and-speed).

	# This is our Modal function. The function runs through the `StableDiffusionPipeline` pipeline.
	# It sends the PIL image back to our CLI where we save the resulting image in a local file.


	class StableDiffusion:
	def __enter__(self):
	import diffusers
	import torch

	torch.backends.cuda.matmul.allow_tf32 = True

	scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
	cache_path,
	subfolder="scheduler",
	solver_order=2,
	prediction_type="epsilon",
	thresholding=False,
	algorithm_type="dpmsolver++",
	solver_type="midpoint",
	denoise_final=True, # important if steps are <= 10
	)
	self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(cache_path, scheduler=scheduler).to("cuda")
	self.pipe.enable_xformers_memory_efficient_attention()

	@stub.function(gpu="A10G")
	async def run_inference(self, prompt: str, channel_id: Optional[str] = None, steps: int = 100, batch_size: int = 4) -> list[map]:
	import torch

	with torch.inference_mode():
	with torch.autocast("cuda"):
	images = self.pipe([prompt] * batch_size, num_inference_steps=steps, guidance_scale=7.0).images


	uploads = []
	# Convert PIL Image to PNG byte array.
	for image in images:
	with io.BytesIO() as buf:
	image.save(buf, format="PNG")
	img_bytes = buf.getvalue()
	uploads.append({
	"content": img_bytes,
	"title": prompt,
	"filename": f"{prompt}.png",
	})

	if channel_id:
	# `post_image_to_slack` is implemented further below.
	post_image_to_slack.call(prompt, channel_id, uploads)

	return uploads



	# @stub.function(
	# gpu="A10G",
	# image=(
	# modal.Image.debian_slim()
	# .pip_install(
	# "accelerate",
	# "diffusers[torch]>=0.10",
	# "ftfy",
	# "torch",
	# "torchvision",
	# "transformers",
	# "triton",
	# "safetensors",
	# "xformers==0.0.16rc393",
	# )

	# ),
	# shared_volumes={CACHE_PATH: volume},
	# secret=modal.Secret.from_name("my-huggingface-secret"),
	# )
	# async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
	# import torch
	# from diffusers import StableDiffusionPipeline

	# torch.backends.cuda.matmul.allow_tf32 = True

	# pipe = StableDiffusionPipeline.from_pretrained(
	# "stabilityai/stable-diffusion-2-1-base",
	# use_auth_token=os.environ["HUGGINGFACE_TOKEN"],
	# revision="fp16",
	# torch_dtype=torch.float16,
	# cache_dir=CACHE_PATH,
	# device_map="auto",
	# ).to("cuda")
	# pipe.stub()

	# image = pipe(prompt, num_inference_steps=100).images[0]

	# # Convert PIL Image to PNG byte array.
	# with io.BytesIO() as buf:
	# image.save(buf, format="PNG")
	# img_bytes = buf.getvalue()

	# if channel_name:
	# # `post_image_to_slack` is implemented further below.
	# post_image_to_slack.call(prompt, channel_name, img_bytes)

	# return img_bytes


	# ## Slack webhook
	#
	# Now that we wrote our function, we'd like to trigger it from Slack. We can do
	# this with [slash commands](https://api.slack.com/interactivity/slash-commands)
	# — a feature that lets you register prefixes (such as `/run-my-bot`) to
	# trigger webhooks of your choice.
	#
	# To serve our model as a web endpoint, we apply the
	# [`@stub.webhook`](/docs/guide/webhooks#webhook) decorator in place of
	# `@stub.function`. Modal webhooks are [FastAPI](https://fastapi.tiangolo.com/)
	# endpoints by default (though we accept any ASGI web framework). This webhook
	# retrieves the form body passed from Slack.
	#
	# Instead of blocking on the result of the stable diffusion model (which could
	# take some time), we want to notify the user immediately that their request
	# is being processed. Modal Functions let you
	# [`submit`](/docs/reference/modal.Function#submit) an input without waiting for
	# the results, which we use here to kick off model inference as a background task.

	from fastapi import Request

	@stub.webhook(method="POST")
	async def entrypoint(request: Request):
	body = await request.form()
	channel_id = body["channel_id"]
	prompt = body["text"]
	sd = StableDiffusion()
	sd.run_inference.spawn(prompt, channel_id)
	return {
	"text": f"Running stable diffusion for {prompt}.",
	"response_type": "in_channel",
	}


	# ## Post to Slack
	#
	# Finally, let's define a function to post images to a Slack channel.
	#
	# First, we need to create a Slack app and store the token for our app as a
	# Modal secret. To do so, visit the the Modal [Secrets](/secrets) page and click
	# "create a Slack secret". Then, you will find instructions on how to create a
	# Slack app, give it OAuth permissions, and get a token. Note that you need to
	# add the `file:write` OAuth scope to the created app.
	#
	# ![create a slack secret](./slack_secret.png)
	#
	# Below, we use the secret and `slack-sdk` to post to a Slack channel.


	@stub.function(
	image=modal.Image.debian_slim().pip_install("slack-sdk"),
	secret=modal.Secret.from_name("my-slack-secret"),
	)
	def post_image_to_slack(title: str, channel_id: str, file_uploads: list[map]):
	import slack_sdk

	client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"])
	client.files_upload_v2(channel=channel_id, file_uploads=file_uploads)


	# ## Deploy the Slackbot
	#
	# That's all the code we need! To deploy your application, run
	#
	# ```shell
	# modal app deploy stable_diffusion_slackbot.py
	# ```
	#
	# If successful, this will print a URL for your new webhook. To point your Slack
	# app at it:
	#
	# - Go back to the [Slack apps page](https://api.slack.com/apps/).
	# - Find your app and navigate to "Slash Commands" under "Features" in the left
	# sidebar.
	# - Click on "Create New Command" and paste the webhook URL from Modal into the
	# "Request URL" field.
	# - Name the command whatever you like, and hit "Save".
	# - Reinstall the app to your workspace.
	#
	# We're done! 🎉 Install the app to any channel you're in, and you can trigger it
	# with the command you chose above.
	#
	# ## Run Manually
	#
	# We can also trigger `run_stable_diffusion` manually for easier debugging.

	OUTPUT_DIR = "/tmp/stable-diffusion"

	if __name__ == "__main__":
	import sys

	if len(sys.argv) > 1:
	prompt = sys.argv[1]
	else:
	prompt = "oil painting of a shiba"

	os.makedirs(OUTPUT_DIR, exist_ok=True)

	with stub.run():
	img_bytes = run_stable_diffusion.call(prompt)
	output_path = os.path.join(OUTPUT_DIR, "output.png")
	with open(output_path, "wb") as f:
	f.write(img_bytes)
	print(f"Wrote data to {output_path}")

	# This code lets us call our script as follows:
	#
	# ```shell
	# python stable_diffusion_slackbot.py "a photo of an astronaut riding a horse on mars"
	# ```
	#
	# The resulting image can be found in `/tmp/render/output.png`.