Skip to content

Instantly share code, notes, and snippets.

@luiscape
Created March 18, 2023 15:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luiscape/9387d96f8674df1b0eed3390e0117bf0 to your computer and use it in GitHub Desktop.
Save luiscape/9387d96f8674df1b0eed3390e0117bf0 to your computer and use it in GitHub Desktop.
Stable Diffusion on PyTorch 2.0 (Modal)
# ## Basic setup
from __future__ import annotations
import io
import os
import time
from pathlib import Path
import modal
# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
# the application. Let's give it a friendly name.
stub = modal.Stub("stable-diffusion-cli")
# We will be using `typer` to create our CLI interface.
import typer
app = typer.Typer()
# ## Model dependencies
#
# Your model will be running remotely inside a container. We will be installing
# all the model dependencies in the next step. We will also be "baking the model"
# into the image by running a Python function as a part of building the image.
# This lets us start containers much faster, since all the data that's needed is
# already inside the image.
model_id = "runwayml/stable-diffusion-v1-5"
cache_path = "/vol/cache"
def download_models():
import diffusers
import torch
hugging_face_token = os.environ["HUGGINGFACE_TOKEN"]
# Download scheduler configuration. Experiment with different schedulers
# to identify one that works best for your use-case.
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
use_auth_token=hugging_face_token,
cache_dir=cache_path,
)
scheduler.save_pretrained(cache_path, safe_serialization=True)
# Downloads all other models.
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_id,
use_auth_token=hugging_face_token,
revision="fp16",
torch_dtype=torch.float16,
cache_dir=cache_path,
)
pipe.save_pretrained(cache_path, safe_serialization=True)
image = (
modal.Image.debian_slim(python_version="3.10")
.pip_install(
"accelerate",
"diffusers[torch]>=0.10",
"ftfy",
"torchvision",
"transformers",
"triton",
"safetensors",
)
.pip_install("torch==2.0.0")
.run_function(
download_models,
secrets=[modal.Secret.from_name("huggingface-secret")],
)
)
stub.image = image
# ## Using container lifecycle methods
#
class StableDiffusion:
def __enter__(self):
import diffusers
import torch
import time
torch.backends.cuda.matmul.allow_tf32 = True
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
cache_path,
subfolder="scheduler",
solver_order=2,
prediction_type="epsilon",
thresholding=False,
algorithm_type="dpmsolver++",
solver_type="midpoint",
denoise_final=True, # important if steps are <= 10
)
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
cache_path, scheduler=scheduler
).to("cuda")
t0 = time.time()
self.pipe.unet = torch.compile(self.pipe.unet)
print(f"compiled unet in => {time.time() - t0:3f}s")
@stub.function(gpu="A100", concurrency_limit=1)
def run_inference(
self, prompt: str, steps: int = 20, batch_size: int = 4
) -> list[bytes]:
import torch
with torch.inference_mode():
with torch.autocast("cuda"):
images = self.pipe(
[prompt] * batch_size,
num_inference_steps=steps,
guidance_scale=7.0,
).images
# Convert to PNG bytes
image_output = []
for image in images:
with io.BytesIO() as buf:
image.save(buf, format="PNG")
image_output.append(buf.getvalue())
return image_output
# This is the command we'll use to generate images. It takes a `prompt`,
# `samples` (the number of images you want to generate), `steps` which
# configures the number of inference steps the model will make, and `batch_size`
# which determines how many images to generate for a given prompt.
@stub.local_entrypoint
def entrypoint(
prompt: str, samples: int = 5, steps: int = 10, batch_size: int = 1
):
typer.echo(
f"prompt => {prompt}, steps => {steps}, samples => {samples}, batch_size => {batch_size}"
)
dir = Path("/tmp/stable-diffusion")
if not dir.exists():
dir.mkdir(exist_ok=True, parents=True)
sd = StableDiffusion()
for i in range(samples):
t0 = time.time()
images = sd.run_inference.call(prompt, steps, batch_size)
total_time = time.time() - t0
print(
f"Sample {i} took {total_time:.3f}s ({(total_time)/len(images):.3f}s / image)."
)
for j, image_bytes in enumerate(images):
output_path = dir / f"output_{j}_{i}.png"
print(f"Saving it to {output_path}")
with open(output_path, "wb") as f:
f.write(image_bytes)
# And this is our entrypoint; where the CLI is invoked. Explore CLI options
# with: `modal run stable_diffusion_cli.py --help`
# # Performance
#
# This example can generate pictures in about a second, with startup time of about 10s for the first picture.
#
# See distribution of latencies below. This data was gathered by running 500 requests in sequence (meaning only
# the first request incurs a cold start). As you can see, the 90th percentile is 1.2s and the 99th percentile is 2.30s.
#
# ![latencies](./stable_diffusion_latencies.png)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment