Created
June 5, 2024 03:55
-
-
Save sayakpaul/3154605f6af05b98a41081aaba5ca43e to your computer and use it in GitHub Desktop.
Run `HunyuanDiTPipeline` from Diffusers under 6GBs of GPU VRAM.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Make sure you have `diffusers`, `accelerate`, `transformers`, and `bitsandbytes` installed. | |
You also set up PyTorch and CUDA. | |
Once the dependencies are installed, you can run `python run_hunyuan_dit_less_memory.py`. | |
""" | |
from diffusers import HunyuanDiTPipeline | |
from transformers import T5EncoderModel | |
import torch | |
import gc | |
def flush(): | |
gc.collect() | |
torch.cuda.empty_cache() | |
def bytes_to_giga_bytes(bytes): | |
return bytes / 1024 / 1024 / 1024 | |
id = "Tencent-Hunyuan/HunyuanDiT-Diffusers" | |
text_encoder_2 = T5EncoderModel.from_pretrained( | |
id, | |
subfolder="text_encoder_2", | |
load_in_8bit=True, | |
device_map="auto", | |
) | |
pipeline = HunyuanDiTPipeline.from_pretrained( | |
id, | |
text_encoder_2=text_encoder_2, | |
transformer=None, | |
vae=None, | |
torch_dtype=torch.float16, | |
device_map="balanced", | |
) | |
with torch.no_grad(): | |
prompt = "一个宇航员在骑马" | |
prompt_embeds, negative_prompt_embeds, prompt_attention_mask, negative_prompt_attention_mask = pipeline.encode_prompt(prompt) | |
( | |
prompt_embeds_2, | |
negative_prompt_embeds_2, | |
prompt_attention_mask_2, | |
negative_prompt_attention_mask_2, | |
) = pipeline.encode_prompt( | |
prompt=prompt, | |
negative_prompt=None, | |
prompt_embeds=None, | |
negative_prompt_embeds=None, | |
prompt_attention_mask=None, | |
negative_prompt_attention_mask=None, | |
max_sequence_length=256, | |
text_encoder_index=1, | |
) | |
del text_encoder_2 | |
del pipeline | |
flush() | |
pipe = HunyuanDiTPipeline.from_pretrained( | |
id, | |
text_encoder=None, | |
text_encoder_2=None, | |
torch_dtype=torch.float16, | |
).to("cuda") | |
image = pipe( | |
negative_prompt=None, | |
prompt_embeds=prompt_embeds, | |
prompt_embeds_2=prompt_embeds_2, | |
negative_prompt_embeds=negative_prompt_embeds, | |
negative_prompt_embeds_2=negative_prompt_embeds_2, | |
prompt_attention_mask=prompt_attention_mask, | |
prompt_attention_mask_2=prompt_attention_mask_2, | |
negative_prompt_attention_mask=negative_prompt_attention_mask, | |
negative_prompt_attention_mask_2=negative_prompt_attention_mask_2, | |
num_images_per_prompt=1, | |
).images[0] | |
print( | |
f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB" | |
) | |
image.save("memory_optimized.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Need a deterministic reproducible code.