Skip to content

Instantly share code, notes, and snippets.

@chavinlo
Created January 30, 2023 17:45
Show Gist options
  • Save chavinlo/7203d4cca8b5894f038c4a5cca4ef093 to your computer and use it in GitHub Desktop.
Save chavinlo/7203d4cca8b5894f038c4a5cca4ef093 to your computer and use it in GitHub Desktop.
#This module is meant for direct use only. For API-usage please check SDA-TRAINER.
#Based off NVIDIA's demo
import argparse
from threads.trt.models import CLIP, UNet, VAE
import os
import onnx
import torch
from diffusers import UNet2DConditionModel, AutoencoderKL
from transformers import CLIPTextModel
from threads.trt.utilities import Engine
import argparse
import io
from termcolor import colored
from huggingface_hub import create_repo, login, HfApi
import json
def getModelPath(name, onnx_dir, opt=True):
return os.path.join(onnx_dir, name+('.opt' if opt else '')+'.onnx')
def select_option(start, options):
print(start)
while True:
for i, option in enumerate(options):
print(f"{i+1}. {option}")
choice = input("Select an option:")
try:
choice = int(choice)
if 1 <= choice <= len(options):
print(colored(f"You selected {options[choice-1]}", 'green'))
return options[choice-1]
else:
print(colored("Invalid selection. Please choose a number between 1 and", 'red'), len(options))
except ValueError:
print(colored("Invalid selection. Please enter a number.", 'red'))
trt_version = "none"
cuda_version = "none"
cudnn_version = "none"
onnx2trt_version = "none"
plugin_path = os.environ['PLUGIN_LIBS']
build_path = os.path.abspath(os.path.join(os.path.dirname(plugin_path), os.pardir))
cmakecache_path = os.path.join(build_path, 'CMakeCache.txt')
if os.path.exists(cmakecache_path):
with open(cmakecache_path) as f:
print("THE FOLLOWING VERSIONS WERE EXTRACTED FROM THE CMAKECACHE USED TO BUILD THE GIVEN PLUGIN.")
for line in f:
if "CMAKE_PROJECT_VERSION:STATIC" in line:
trt_version = line.split("=")[-1].replace("\n","")
print(f"Detected TensorRT version: {trt_version}")
if "CUDA_VERSION:UNINITIALIZED" in line:
cuda_version = line.split("=")[-1].replace("\n","")
print(f"Detected CUDA version: {cuda_version}")
if "CUDNN_VERSION:UNINITIALIZED" in line:
cudnn_version = line.split("=")[-1].replace("\n","") #<-- aka compute version
print(f"Detected CUDNN version: {cudnn_version}")
if "ONNX2TRT_VERSION:STRING" in line:
onnx2trt_version = line.split("=")[-1].replace("\n","")
print(f"Detected ONNX2TRT version: {onnx2trt_version}")
else:
print("Failed to detect CMakeCache.txt file. If you know what compute version your plugin.so is using, please type them")
print("This is to allow other users to use the model with proper compute versioning.")
print("This is crucial if you want to upload to huggingface.")
opt_notfound = select_option("Do you know the compute version?", ['Yes', 'No'])
if opt_notfound.lower() == 'yes':
print("Type the Compute/CUDNN version in the following format: X.x example: 7.5")
cudnn_version = input("Type: ")
print("Compute/CUDNN version set. TensorRT, CUDA, and ONNX2TRT versions have not been configured, but they are not necessary.")
else:
print("Generated config file will not display compute version.")
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model', help="Local Path to folder or HuggingFace ID to the diffuser model")
parser.add_argument('-o', '--output', default="./output", help="Output directory")
parser.add_argument('--build-dynamic-shape', action='store_true', help="Build TensorRT engines with dynamic image shapes.")
parser.add_argument('--hf-token', type=str, default="none", help="HuggingFace API access token for downloading model checkpoints")
parser.add_argument('-v', '--verbose', action='store_true', help="Enable Verbose")
args = parser.parse_args()
if os.path.exists(args.output):
folder_exists = select_option(f"The output folder ({args.output}) already exists. It is possible that there already is a model inside of it. Abort? or delete and continue?",
['Abort', 'Delete & Continue'])
if folder_exists.lower() == "abort":
exit()
elif folder_exists.lower() == "delete & continue":
os.removedirs(args.output)
onnx_dir = os.path.join(args.output, 'onnx')
engine_dir = os.path.join(args.output, 'engine')
os.makedirs(onnx_dir, exist_ok=True)
os.makedirs(engine_dir, exist_ok=True)
hf_token = args.hf_token
device = "cuda"
verbose = args.verbose
max_batch_size = 16 if args.build_dynamic_shape is False else 4
opt_batch_size = max_batch_size
denoising_fp16 = True
models = {
# 'clip': CLIP(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size),
'unet_fp16': UNet(hf_token=hf_token, fp16=denoising_fp16, device=device, verbose=verbose, max_batch_size=max_batch_size),
'vae': VAE(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size)
}
def get_model(type, path):
if type in 'unet_fp16':
#UNET
tmp_model = UNet2DConditionModel.from_pretrained(
path,
subfolder="unet",
use_auth_token=hf_token,
torch_dtype = torch.float16
).to(device)
elif type == 'clip':
#CLIP
tmp_model = CLIPTextModel.from_pretrained(
path,
subfolder="text_encoder",
use_auth_token=hf_token,
).to(device)
elif type == 'vae':
#VAE
tmp_model = AutoencoderKL.from_pretrained(
path,
subfolder="vae",
use_auth_token=hf_token,
).to(device)
tmp_model.forward = tmp_model.decode
return tmp_model
#Just to fill
opt_image_height = 512
opt_image_width = 512
#check this later
onnx_opset = 16
for model_name, obj in models.items():
engine = Engine(model_name, engine_dir)
onnx_path = getModelPath(model_name, onnx_dir, opt=False)
onnx_opt_path = getModelPath(model_name, onnx_dir, opt=True)
print(f"Exporting model: {onnx_path}")
#important: change model path to desired one
model = get_model(model_name, args.model)
#opt_batch_size does not necesairly means that it's going to be static batch size
with torch.inference_mode(), torch.autocast("cuda"):
inputs = obj.get_sample_input(opt_batch_size, opt_image_height, opt_image_width)
torch.onnx.export(model,
inputs,
onnx_path,
export_params=True,
opset_version=onnx_opset,
do_constant_folding=True,
input_names = obj.get_input_names(),
output_names = obj.get_output_names(),
dynamic_axes=obj.get_dynamic_axes(),
)
print(f"Generating optimized ONNX model: {onnx_opt_path}")
#minimal optimization flag was removed here for obvious reasons
onnx_opt_graph = obj.optimize(onnx.load(onnx_path))
onnx.save(onnx_opt_graph, onnx_opt_path)
# Build engine
print(f"Generating TensorRT model: {onnx_opt_path}")
# Disable preview since it requires high levels of TRT version
engine.build(onnx_opt_path, fp16=True , \
input_profile=obj.get_input_profile(opt_batch_size, opt_image_height, opt_image_width, \
static_batch=False, static_shape=not args.build_dynamic_shape), \
enable_preview=False)
option = select_option("Upload model to HuggingFace?", ['Y', 'N'])
if option.lower() == "y":
login()
mkrepo = select_option("Create or use an existing repo?", ['CREATE', 'EXISTING'])
print("The name MUST include your username. For ex.: chavinlo/AlienPop")
repo_name = input("Repository Name:")
#kinda confusing
if mkrepo == 'CREATE':
priv_opt = select_option("Make it private?", ['Y', 'N'])
create_repo(repo_name, private=True if priv_opt.lower() == 'y' else False, repo_type="model")
path_in_repo = "engine/"
cuspath = select_option("By default the model will be uploaded on /engine, do you want to change this?", ['Y', 'N'])
if cuspath.lower() == 'y':
path_in_repo = input("Custom path:")
revision = "main"
cusrev = select_option("By default the model will be uploaded on the main branch, do you want to change this?", ['Y', 'N'])
if cusrev.lower() == 'y':
revision = input("Custom branch/revision:")
print("The following will be the configuration file. This has been generated and is highly recommended to not edit it.")
config = {
"_class_name": "StableDiffusionAccelerated_Base",
"_sda_version": "0.1",
"_trt_version": trt_version,
"_cuda_version": cuda_version,
"_cudnn_version": cudnn_version,
"_onnx2trt_version": onnx2trt_version,
"UNET": {
"precision": "fp16",
"path": f"{path_in_repo}unet_fp16.plan"
},
"CLIP": {
"path": f"{path_in_repo}clip.plan"
},
"VAE": {
"path": f"{path_in_repo}vae.plan"
}
}
print(config)
print("Uploading...")
api = HfApi()
api.upload_folder(
folder_path=engine_dir,
path_in_repo=path_in_repo,
repo_id=repo_name,
repo_type="model"
)
#Model Card
def_readme = """
# {MODEL_NAME} converted into TensorRT
Model converted from diffusers into TensorRT for accelerated inference up to 4x faster.
For how to use the model check https://github.com/chavinlo/sda-node
This model was automatically converted by SDA-node
Compilation configuration:
"""
# Replace {MODEL_NAME} with args.model
def_readme = def_readme.format(MODEL_NAME=args.model)
# Convert the config dict to a json string
config_json = json.dumps(config, indent=4)
# Add the json string to the bottom of the markdown string, wrapped in markdown code blocks
def_readme += "\n\n```json\n" + config_json + "\n```"
file = io.BytesIO(def_readme.encode())
api.upload_file(
path_or_fileobj=file,
path_in_repo="README.md",
repo_id=repo_name,
repo_type="model"
)
api.upload_file(
path_or_fileobj=io.BytesIO(json.dumps(config, indent=4).encode()),
path_in_repo="model_index.json",
repo_id=repo_name,
repo_type="model"
)
print("\n\n")
print("Successfully uploaded")
print(f"Uploaded into https://huggingface.co/{repo_name}")
print(f"Your model is available at: {os.path.abspath(engine_dir)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment