Skip to content

Instantly share code, notes, and snippets.

@jochemstoel
Forked from luiscape/mounting_hf_model.py
Created February 1, 2023 20:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jochemstoel/6aa11231420e25732c3235758ad9eab1 to your computer and use it in GitHub Desktop.
Save jochemstoel/6aa11231420e25732c3235758ad9eab1 to your computer and use it in GitHub Desktop.
`modal.Mount` a HuggingFace model
import io
import time
import modal
import os
import torch
import diffusers
from pathlib import Path
stub = modal.Stub("local-model-mount-test")
model_id = "runwayml/stable-diffusion-v1-5"
hf_token = os.getenv("HUGGINGFACE_TOKEN")
local_path = "/tmp/hf-model"
image = (
modal.Image.conda()
.run_commands(
[
"conda install xformers -c xformers/label/dev",
"conda install pytorch torchvision pytorch-cuda=11.7 -c pytorch -c nvidia",
]
)
.run_commands(["pip install diffusers[torch] transformers ftfy accelerate"])
)
stub.image = image
def download_model():
euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained(
model_id, subfolder="scheduler", use_auth_token=hf_token, cache_dir=local_path
)
euler.save_pretrained(local_path)
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_id, use_auth_token=hf_token, revision="fp16", torch_dtype=torch.float16, cache_dir=local_path
)
pipe.save_pretrained(local_path)
class StableDiffusion:
def __enter__(self):
import torch
import diffusers
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained(local_path, subfolder="scheduler")
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(local_path, scheduler=euler).to("cuda")
self.pipe.enable_xformers_memory_efficient_attention()
@stub.function(gpu=modal.gpu.A100(), mounts=[
modal.Mount(local_dir=local_path, remote_dir=local_path)
])
def run_inference(self, prompt: str, steps: int = 20) -> bytes:
import torch
with torch.inference_mode():
image = self.pipe(prompt, num_inference_steps=steps, guidance_scale=7.0).images[0]
# Convert to PNG bytes
buf = io.BytesIO()
image.save(buf, format="PNG")
image_bytes = buf.getvalue()
return image_bytes
def run_inference():
samples = 10
prompt = "An 1600s oil painting of the New York City skyline"
dir = Path("/tmp/stable-diffusion")
if not dir.exists():
dir.mkdir(exist_ok=True, parents=True)
with stub.run():
sd = StableDiffusion()
for i in range(samples):
t0 = time.time()
image_bytes = sd.run_inference.call(prompt)
output_path = dir / f"output_{i}.png"
print(f"Sample {i} took {time.time()-t0:.3f}s. Saving it to {output_path}")
with open(output_path, "wb") as f:
f.write(image_bytes)
if __name__ == "__main__":
download_model()
run_inference()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment