Skip to content

Instantly share code, notes, and snippets.

@sayakpaul
Created June 5, 2024 03:55
Show Gist options
  • Save sayakpaul/3154605f6af05b98a41081aaba5ca43e to your computer and use it in GitHub Desktop.
Save sayakpaul/3154605f6af05b98a41081aaba5ca43e to your computer and use it in GitHub Desktop.
Run `HunyuanDiTPipeline` from Diffusers under 6GBs of GPU VRAM.
"""
Make sure you have `diffusers`, `accelerate`, `transformers`, and `bitsandbytes` installed.
You also set up PyTorch and CUDA.
Once the dependencies are installed, you can run `python run_hunyuan_dit_less_memory.py`.
"""
from diffusers import HunyuanDiTPipeline
from transformers import T5EncoderModel
import torch
import gc
def flush():
gc.collect()
torch.cuda.empty_cache()
def bytes_to_giga_bytes(bytes):
return bytes / 1024 / 1024 / 1024
id = "Tencent-Hunyuan/HunyuanDiT-Diffusers"
text_encoder_2 = T5EncoderModel.from_pretrained(
id,
subfolder="text_encoder_2",
load_in_8bit=True,
device_map="auto",
)
pipeline = HunyuanDiTPipeline.from_pretrained(
id,
text_encoder_2=text_encoder_2,
transformer=None,
vae=None,
torch_dtype=torch.float16,
device_map="balanced",
)
with torch.no_grad():
prompt = "一个宇航员在骑马"
prompt_embeds, negative_prompt_embeds, prompt_attention_mask, negative_prompt_attention_mask = pipeline.encode_prompt(prompt)
(
prompt_embeds_2,
negative_prompt_embeds_2,
prompt_attention_mask_2,
negative_prompt_attention_mask_2,
) = pipeline.encode_prompt(
prompt=prompt,
negative_prompt=None,
prompt_embeds=None,
negative_prompt_embeds=None,
prompt_attention_mask=None,
negative_prompt_attention_mask=None,
max_sequence_length=256,
text_encoder_index=1,
)
del text_encoder_2
del pipeline
flush()
pipe = HunyuanDiTPipeline.from_pretrained(
id,
text_encoder=None,
text_encoder_2=None,
torch_dtype=torch.float16,
).to("cuda")
image = pipe(
negative_prompt=None,
prompt_embeds=prompt_embeds,
prompt_embeds_2=prompt_embeds_2,
negative_prompt_embeds=negative_prompt_embeds,
negative_prompt_embeds_2=negative_prompt_embeds_2,
prompt_attention_mask=prompt_attention_mask,
prompt_attention_mask_2=prompt_attention_mask_2,
negative_prompt_attention_mask=negative_prompt_attention_mask,
negative_prompt_attention_mask_2=negative_prompt_attention_mask_2,
num_images_per_prompt=1,
).images[0]
print(
f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB"
)
image.save("memory_optimized.png")
@sayakpaul
Copy link
Author

Need a deterministic reproducible code.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment