Last active
December 25, 2022 19:38
-
-
Save scottjg/df69723c2bcd527ff8d7335f1bb5484a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --- | |
# output-directory: "/tmp/stable-diffusion" | |
# --- | |
# # Stable diffusion slackbot | |
# | |
# This tutorial shows you how to build a Slackbot that uses | |
# [stable diffusion](https://stability.ai/blog/stable-diffusion-public-release) | |
# to produce realistic images from text prompts on demand. | |
# | |
# ![stable diffusion slackbot](./stable_diff_screenshot.jpg) | |
# ## Basic setup | |
import io | |
import os | |
from typing import Optional | |
import modal | |
# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for | |
# the application. Let's give it a friendly name. | |
stub = modal.Stub("example-stable-diff-bot") | |
# ## Inference Function | |
# | |
# ### HuggingFace token | |
# | |
# We're going to use the pre-trained | |
# [stable diffusion model](https://github.com/runwayml/stable-diffusion-v1-5) in | |
# HuggingFace's `diffusers` library. To gain access, you need to sign in to your | |
# HuggingFace account ([sign up here](https://huggingface.co/join)) and request | |
# access on the [model card page](https://huggingface.co/runwayml/stable-diffusion-v1-5). | |
# | |
# Next, [create a HuggingFace access token](https://huggingface.co/settings/tokens). | |
# To access the token in a Modal function, we can create a secret on the | |
# [secrets page](https://modal.com/secrets). Let's use the environment variable | |
# named `HUGGINGFACE_TOKEN`. Functions that inject this secret will have access | |
# to the environment variable. | |
# | |
# ![create a huggingface token](./huggingface_token.png) | |
# | |
# ## Model dependencies | |
# | |
# Your model will be running remotely inside a container. We will be installing | |
# all the model dependencies in the next step. We will also be "baking the model" | |
# into the image by running a Python function as a part of building the image. | |
# This lets us start containers much faster, since all the data that's needed is | |
# already inside the image. | |
model_id = "stabilityai/stable-diffusion-2-1-base" | |
cache_path = "/vol/cache/sb21" | |
def download_models(): | |
# version: 1 | |
import diffusers | |
import torch | |
hugging_face_token = os.environ["HUGGINGFACE_TOKEN"] | |
# Download scheduler configuration. Experiment with different schedulers | |
# to identify one that works best for your use-case. | |
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained( | |
model_id, subfolder="scheduler", use_auth_token=hugging_face_token, cache_dir=cache_path | |
) | |
scheduler.save_pretrained(cache_path, safe_serialization=True) | |
# Downloads all other models. | |
pipe = diffusers.StableDiffusionPipeline.from_pretrained( | |
model_id, use_auth_token=hugging_face_token, revision="fp16", torch_dtype=torch.float16, cache_dir=cache_path | |
) | |
pipe.save_pretrained(cache_path, safe_serialization=True) | |
image = ( | |
modal.Image.debian_slim() | |
.pip_install( | |
"accelerate", | |
"diffusers[torch]>=0.10", | |
"ftfy", | |
"torch", | |
"torchvision", | |
"transformers", | |
"triton", | |
"safetensors", | |
"xformers==0.0.16rc393", | |
) | |
.run_function( | |
download_models, | |
secrets=[modal.Secret.from_name("my-huggingface-secret")], | |
) | |
) | |
stub.image = image | |
# ## Using container lifecycle methods | |
# | |
# Modal lets you implement code that runs every time a container starts. This | |
# can be a huge optimization when you're calling a function multiple times, | |
# since Modal reuses the same containers when possible. | |
# | |
# The way to implement this is to turn the Modal function into a method on a | |
# class that also implement the Python context manager interface, meaning it | |
# has the `__enter__` method (the `__exit__` method is optional). | |
# | |
# We have also have applied a few model optimizations to make the model run | |
# faster. On an A10G, the model takes about 6.5s to load into memory, and then | |
# 1.6s per generation on average. On a T4, it takes 13s to load and 3.7s per | |
# generation. Other optimizations are also available [here](https://huggingface.co/docs/diffusers/optimization/fp16#memory-and-speed). | |
# This is our Modal function. The function runs through the `StableDiffusionPipeline` pipeline. | |
# It sends the PIL image back to our CLI where we save the resulting image in a local file. | |
class StableDiffusion: | |
def __enter__(self): | |
import diffusers | |
import torch | |
torch.backends.cuda.matmul.allow_tf32 = True | |
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained( | |
cache_path, | |
subfolder="scheduler", | |
solver_order=2, | |
prediction_type="epsilon", | |
thresholding=False, | |
algorithm_type="dpmsolver++", | |
solver_type="midpoint", | |
denoise_final=True, # important if steps are <= 10 | |
) | |
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(cache_path, scheduler=scheduler).to("cuda") | |
self.pipe.enable_xformers_memory_efficient_attention() | |
@stub.function(gpu="A10G") | |
async def run_inference(self, prompt: str, channel_id: Optional[str] = None, steps: int = 100, batch_size: int = 4) -> list[map]: | |
import torch | |
with torch.inference_mode(): | |
with torch.autocast("cuda"): | |
images = self.pipe([prompt] * batch_size, num_inference_steps=steps, guidance_scale=7.0).images | |
uploads = [] | |
# Convert PIL Image to PNG byte array. | |
for image in images: | |
with io.BytesIO() as buf: | |
image.save(buf, format="PNG") | |
img_bytes = buf.getvalue() | |
uploads.append({ | |
"content": img_bytes, | |
"title": prompt, | |
"filename": f"{prompt}.png", | |
}) | |
if channel_id: | |
# `post_image_to_slack` is implemented further below. | |
post_image_to_slack.call(prompt, channel_id, uploads) | |
return uploads | |
# @stub.function( | |
# gpu="A10G", | |
# image=( | |
# modal.Image.debian_slim() | |
# .pip_install( | |
# "accelerate", | |
# "diffusers[torch]>=0.10", | |
# "ftfy", | |
# "torch", | |
# "torchvision", | |
# "transformers", | |
# "triton", | |
# "safetensors", | |
# "xformers==0.0.16rc393", | |
# ) | |
# ), | |
# shared_volumes={CACHE_PATH: volume}, | |
# secret=modal.Secret.from_name("my-huggingface-secret"), | |
# ) | |
# async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None): | |
# import torch | |
# from diffusers import StableDiffusionPipeline | |
# torch.backends.cuda.matmul.allow_tf32 = True | |
# pipe = StableDiffusionPipeline.from_pretrained( | |
# "stabilityai/stable-diffusion-2-1-base", | |
# use_auth_token=os.environ["HUGGINGFACE_TOKEN"], | |
# revision="fp16", | |
# torch_dtype=torch.float16, | |
# cache_dir=CACHE_PATH, | |
# device_map="auto", | |
# ).to("cuda") | |
# pipe.stub() | |
# image = pipe(prompt, num_inference_steps=100).images[0] | |
# # Convert PIL Image to PNG byte array. | |
# with io.BytesIO() as buf: | |
# image.save(buf, format="PNG") | |
# img_bytes = buf.getvalue() | |
# if channel_name: | |
# # `post_image_to_slack` is implemented further below. | |
# post_image_to_slack.call(prompt, channel_name, img_bytes) | |
# return img_bytes | |
# ## Slack webhook | |
# | |
# Now that we wrote our function, we'd like to trigger it from Slack. We can do | |
# this with [slash commands](https://api.slack.com/interactivity/slash-commands) | |
# — a feature that lets you register prefixes (such as `/run-my-bot`) to | |
# trigger webhooks of your choice. | |
# | |
# To serve our model as a web endpoint, we apply the | |
# [`@stub.webhook`](/docs/guide/webhooks#webhook) decorator in place of | |
# `@stub.function`. Modal webhooks are [FastAPI](https://fastapi.tiangolo.com/) | |
# endpoints by default (though we accept any ASGI web framework). This webhook | |
# retrieves the form body passed from Slack. | |
# | |
# Instead of blocking on the result of the stable diffusion model (which could | |
# take some time), we want to notify the user immediately that their request | |
# is being processed. Modal Functions let you | |
# [`submit`](/docs/reference/modal.Function#submit) an input without waiting for | |
# the results, which we use here to kick off model inference as a background task. | |
from fastapi import Request | |
@stub.webhook(method="POST") | |
async def entrypoint(request: Request): | |
body = await request.form() | |
channel_id = body["channel_id"] | |
prompt = body["text"] | |
sd = StableDiffusion() | |
sd.run_inference.spawn(prompt, channel_id) | |
return { | |
"text": f"Running stable diffusion for {prompt}.", | |
"response_type": "in_channel", | |
} | |
# ## Post to Slack | |
# | |
# Finally, let's define a function to post images to a Slack channel. | |
# | |
# First, we need to create a Slack app and store the token for our app as a | |
# Modal secret. To do so, visit the the Modal [Secrets](/secrets) page and click | |
# "create a Slack secret". Then, you will find instructions on how to create a | |
# Slack app, give it OAuth permissions, and get a token. Note that you need to | |
# add the `file:write` OAuth scope to the created app. | |
# | |
# ![create a slack secret](./slack_secret.png) | |
# | |
# Below, we use the secret and `slack-sdk` to post to a Slack channel. | |
@stub.function( | |
image=modal.Image.debian_slim().pip_install("slack-sdk"), | |
secret=modal.Secret.from_name("my-slack-secret"), | |
) | |
def post_image_to_slack(title: str, channel_id: str, file_uploads: list[map]): | |
import slack_sdk | |
client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"]) | |
client.files_upload_v2(channel=channel_id, file_uploads=file_uploads) | |
# ## Deploy the Slackbot | |
# | |
# That's all the code we need! To deploy your application, run | |
# | |
# ```shell | |
# modal app deploy stable_diffusion_slackbot.py | |
# ``` | |
# | |
# If successful, this will print a URL for your new webhook. To point your Slack | |
# app at it: | |
# | |
# - Go back to the [Slack apps page](https://api.slack.com/apps/). | |
# - Find your app and navigate to "Slash Commands" under "Features" in the left | |
# sidebar. | |
# - Click on "Create New Command" and paste the webhook URL from Modal into the | |
# "Request URL" field. | |
# - Name the command whatever you like, and hit "Save". | |
# - Reinstall the app to your workspace. | |
# | |
# We're done! 🎉 Install the app to any channel you're in, and you can trigger it | |
# with the command you chose above. | |
# | |
# ## Run Manually | |
# | |
# We can also trigger `run_stable_diffusion` manually for easier debugging. | |
OUTPUT_DIR = "/tmp/stable-diffusion" | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) > 1: | |
prompt = sys.argv[1] | |
else: | |
prompt = "oil painting of a shiba" | |
os.makedirs(OUTPUT_DIR, exist_ok=True) | |
with stub.run(): | |
img_bytes = run_stable_diffusion.call(prompt) | |
output_path = os.path.join(OUTPUT_DIR, "output.png") | |
with open(output_path, "wb") as f: | |
f.write(img_bytes) | |
print(f"Wrote data to {output_path}") | |
# This code lets us call our script as follows: | |
# | |
# ```shell | |
# python stable_diffusion_slackbot.py "a photo of an astronaut riding a horse on mars" | |
# ``` | |
# | |
# The resulting image can be found in `/tmp/render/output.png`. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment