Created
May 21, 2024 16:29
-
-
Save yirenlu92/600b4d8a09c5f2a2f97806cfa417ad77 to your computer and use it in GitHub Desktop.
Code for "Creating an infinite icon library" blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from dataclasses import dataclass | |
from pathlib import Path | |
from fastapi import FastAPI | |
from modal import Image, App, Volume, gpu, Secret, enter, method, asgi_app | |
GIT_SHA = "abd922bd0c43a504e47eca2ed354c3634bd00834" # specify the commit to fetch | |
image = ( | |
Image.debian_slim(python_version="3.10") | |
.pip_install( | |
"accelerate==0.27.2", | |
"datasets~=2.19.1", | |
"ftfy~=6.1.1", | |
"gradio~=3.50.2", | |
"smart_open~=6.4.0", | |
"transformers~=4.38.1", | |
"torch~=2.2.0", | |
"torchvision~=0.16", | |
"triton~=2.2.0", | |
"peft==0.7.0", | |
"wandb==0.16.3", | |
) | |
.apt_install("git") | |
# Perform a shallow fetch of just the target `diffusers` commit, checking out | |
# the commit in the container's current working directory, /root. | |
.run_commands( | |
"cd /root && git init .", | |
"cd /root && git remote add origin https://github.com/huggingface/diffusers", | |
f"cd /root && git fetch --depth=1 origin {GIT_SHA} && git checkout {GIT_SHA}", | |
"cd /root && pip install -e .", | |
) | |
) | |
# ## Set up `Volume`s for training data and model output | |
# | |
# Modal can't access your local filesystem, so you should set up a `Volume` to eventually save the model once training is finished. | |
web_app = FastAPI() | |
# 4000 training steps, on full heroicons with captions without HCON prefix | |
app = App(name="example-diffusers-app-05-15-2024-full-heroicons") | |
MODEL_DIR = Path("/model") | |
model_volume = Volume.from_name( | |
"diffusers-model-volume-05-15-2024-full-heroicons", create_if_missing=True | |
) | |
VOLUME_CONFIG = { | |
MODEL_DIR: model_volume, | |
} | |
DATASET_NAME = [ | |
"yirenlu/heroicons-without-hcon", | |
# "yirenlu/heroicons-subset-100-images", | |
] | |
RESOLUTIONS = [128] | |
# ## Set up config | |
# | |
# Each Diffusers example script takes a different set of hyperparameters, so you will need to customize the config depending on the hyperparameters of the script. The code below shows some example parameters. | |
@dataclass | |
class TrainConfig: | |
"""Configuration for the finetuning training.""" | |
# identifier for pretrained model on Hugging Face | |
model_name: str = "runwayml/stable-diffusion-v1-5" | |
# resume_from_checkpoint: str = "/model/yirenlu/heroicons_512/checkpoint-6000/" | |
# HuggingFace Hub dataset | |
dataset_name = "yirenlu/heroicons" | |
# Hyperparameters/constants from some of the Diffusers examples | |
# You should modify these to match the hyperparameters of the script you are using. | |
mixed_precision: str = "fp16" # set the precision of floats during training, fp16 or less needs to be mixed with fp32 under the hood | |
resolution: int = 128 # images will be sized to this resolution | |
max_train_steps: int = ( | |
5000 # number of times to apply a gradient update during training | |
) | |
checkpointing_steps: int = ( | |
1000 # number of steps between model checkpoints, for resuming training | |
) | |
train_batch_size: int = 1 # how many images to process at once, limited by GPU VRAM | |
gradient_accumulation_steps: int = 4 # how many batches to process before updating the model, stabilizes training with large batch sizes | |
learning_rate: float = 1e-05 # scaling factor on gradient updates, make this proportional to the batch size * accumulation steps | |
lr_scheduler: str = ( | |
"constant" # dynamic schedule for changes to the base learning_rate | |
) | |
lr_warmup_steps: int = 0 # for non-constant lr schedules, how many steps to spend increasing the learning_rate from a small initial value | |
max_grad_norm: int = 1 # value above which to clip gradients, stabilizes training | |
caption_column: str = "text" # name of the column in the dataset that contains the captions of the images | |
validation_prompt: str = "an icon of a dragon creature" | |
@dataclass | |
class AppConfig: | |
"""Configuration information for inference.""" | |
num_inference_steps: int = 50 | |
guidance_scale: float = 20 | |
@app.function( | |
image=image, | |
gpu=gpu.A100( | |
size="80GB" | |
), # finetuning is VRAM hungry, so this should be an A100 or H100 | |
volumes=VOLUME_CONFIG, | |
timeout=3600 * 5, # multiple hours | |
secrets=[Secret.from_name("huggingface-secret-ren")], | |
_allow_background_volume_commits=True, | |
) | |
def train(hyperparameter_config): | |
import huggingface_hub | |
from accelerate import notebook_launcher | |
from accelerate.utils import write_basic_config | |
# change this line to import the training script you want to use | |
from examples.text_to_image.train_text_to_image import main | |
from transformers import CLIPTokenizer | |
# set up TrainConfig | |
config = TrainConfig() | |
# set up hugging face accelerate library for fast training | |
write_basic_config(mixed_precision="fp16") | |
# authenticate to hugging face so we can download the model weights | |
hf_key = os.environ["HF_TOKEN"] | |
huggingface_hub.login(hf_key) | |
# check whether we can access the model repo | |
try: | |
CLIPTokenizer.from_pretrained(config.model_name, subfolder="tokenizer") | |
except OSError as e: # handle error raised when license is not accepted | |
license_error_msg = f"Unable to load tokenizer. Access to this model requires acceptance of the license on Hugging Face here: https://huggingface.co/{config.model_name}." | |
raise Exception(license_error_msg) from e | |
def launch_training(): | |
sys.argv = [ | |
"examples/text_to_image/train_text_to_image.py", # potentially modify | |
f"--pretrained_model_name_or_path={config.model_name}", | |
f"--dataset_name={hyperparameter_config['dataset_name']}", | |
"--use_ema", | |
f"--output_dir={hyperparameter_config['output_dir']}", | |
f"--resolution={hyperparameter_config['resolution']}", | |
"--center_crop", | |
"--random_flip", | |
f"--gradient_accumulation_steps={config.gradient_accumulation_steps}", | |
"--gradient_checkpointing", | |
f"--train_batch_size={config.train_batch_size}", | |
f"--learning_rate={config.learning_rate}", | |
f"--lr_scheduler={config.lr_scheduler}", | |
f"--max_train_steps={config.max_train_steps}", | |
f"--lr_warmup_steps={config.lr_warmup_steps}", | |
f"--checkpointing_steps={config.checkpointing_steps}", | |
# f"--resume_from_checkpoint={hyperparameter_config['checkpoint_dir']}", | |
] | |
main() | |
# run training -- see huggingface accelerate docs for details | |
print("launching fine-tuning training script") | |
notebook_launcher(launch_training, num_processes=1) | |
# The trained model artefacts have been output to the volume mounted at `MODEL_DIR`. | |
model_volume.commit() | |
@app.local_entrypoint() | |
def run(): | |
hyperparameter_search = list( | |
[ | |
{ | |
"dataset_name": dataset_name, | |
"resolution": resolution, | |
"output_dir": MODEL_DIR / f"{dataset_name}_{resolution}", | |
"checkpoint_dir": MODEL_DIR | |
/ f"{dataset_name}_{resolution}/checkpoint-6000/", | |
} | |
for dataset_name in DATASET_NAME | |
for resolution in RESOLUTIONS | |
] | |
) | |
for x in train.map(hyperparameter_search): | |
print(x) | |
@app.cls( | |
image=image, | |
gpu="A10G", # inference requires less VRAM than training, so we can use a cheaper GPU | |
volumes=VOLUME_CONFIG, # mount the location where your model weights were saved to | |
) | |
class Model: | |
@enter() | |
def load_model(self): | |
import torch | |
from diffusers import StableDiffusionPipeline, UNet2DConditionModel | |
# Reload the modal.Volume to ensure the latest state is accessible. | |
model_volume.reload() | |
unet = UNet2DConditionModel.from_pretrained( | |
MODEL_DIR / "yirenlu/heroicons-without-hcon_128/checkpoint-3000/unet", | |
torch_dtype=torch.float16, | |
) | |
pipe = StableDiffusionPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", unet=unet, torch_dtype=torch.float16 | |
) | |
pipe.to("cuda") | |
# pipe.enable_xformers_memory_efficient_attention() | |
# pipe = StableDiffusionPipeline.from_pretrained( | |
# MODEL_DIR / "yirenlu/heroicons-without-hcon_128", torch_dtype=torch.float16 | |
# ) | |
# pipe.to("cuda") | |
# pipe.enable_xformers_memory_efficient_attention() | |
self.pipe = pipe | |
@method() | |
def inference(self, text, config): | |
image = self.pipe( | |
text, | |
num_inference_steps=config.num_inference_steps, | |
guidance_scale=config.guidance_scale, | |
).images[0] | |
return image | |
assets_path = Path(__file__).parent / "assets" | |
@app.function( | |
image=image, | |
concurrency_limit=3, | |
) | |
@asgi_app() | |
def fastapi_app(): | |
import gradio as gr | |
from gradio.routes import mount_gradio_app | |
# Call to the GPU inference function on Modal. | |
def go(text): | |
return Model().inference.remote(text, config) | |
# set up AppConfig | |
config = AppConfig() | |
HCON_prefix = "an icon of" | |
example_prompts = [ | |
f"{HCON_prefix} a movie ticket", | |
f"{HCON_prefix} Barack Obama", | |
f"{HCON_prefix} a castle", | |
f"{HCON_prefix} a German Shepherd", | |
] | |
modal_docs_url = "https://modal.com/docs/guide" | |
modal_example_url = f"{modal_docs_url}/examples/train_and_serve_diffusers_script" | |
description = """Describe a concept that you would like drawn as a [Heroicon](https://heroicons.com/). Try the examples below for inspiration.""" | |
# add a gradio UI around inference | |
interface = gr.Interface( | |
fn=go, | |
inputs="text", | |
outputs=gr.Image(shape=(512, 512)), | |
title="Generate custom heroicons", | |
examples=example_prompts, | |
description=description, | |
css="/assets/index.css", | |
allow_flagging="never", | |
) | |
# mount for execution on Modal | |
return mount_gradio_app( | |
app=web_app, | |
blocks=interface, | |
path="/", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment