chavinlo/compiler.py

## compiler.py
#This module is meant for direct use only. For API-usage please check SDA-TRAINER.
#Based off NVIDIA's demo
import argparse
from threads.trt.models import CLIP, UNet, VAE
import os
import onnx
import torch
from diffusers import UNet2DConditionModel, AutoencoderKL
from transformers import CLIPTextModel
from threads.trt.utilities import Engine
import argparse
import io
from termcolor import colored
from huggingface_hub import create_repo, login, HfApi
import json

def getModelPath(name, onnx_dir, opt=True):
    return os.path.join(onnx_dir, name+('.opt' if opt else '')+'.onnx')

def select_option(start, options):
    print(start)
    while True:
        for i, option in enumerate(options):
            print(f"{i+1}. {option}")
        choice = input("Select an option:")
        try:
            choice = int(choice)
            if 1 <= choice <= len(options):
                print(colored(f"You selected {options[choice-1]}", 'green'))
                return options[choice-1]
            else:
                print(colored("Invalid selection. Please choose a number between 1 and", 'red'), len(options))
        except ValueError:
            print(colored("Invalid selection. Please enter a number.", 'red'))

trt_version = "none"
cuda_version = "none"
cudnn_version = "none"
onnx2trt_version = "none"

plugin_path = os.environ['PLUGIN_LIBS']
build_path = os.path.abspath(os.path.join(os.path.dirname(plugin_path), os.pardir))
cmakecache_path = os.path.join(build_path, 'CMakeCache.txt')

if os.path.exists(cmakecache_path):
    with open(cmakecache_path) as f:
        print("THE FOLLOWING VERSIONS WERE EXTRACTED FROM THE CMAKECACHE USED TO BUILD THE GIVEN PLUGIN.")
        for line in f:
            if "CMAKE_PROJECT_VERSION:STATIC" in line:
                trt_version = line.split("=")[-1].replace("\n","")
                print(f"Detected TensorRT version: {trt_version}")
            if "CUDA_VERSION:UNINITIALIZED" in line:
                cuda_version = line.split("=")[-1].replace("\n","")
                print(f"Detected CUDA version: {cuda_version}")
            if "CUDNN_VERSION:UNINITIALIZED" in line:
                cudnn_version = line.split("=")[-1].replace("\n","") #<-- aka compute version
                print(f"Detected CUDNN version: {cudnn_version}")
            if "ONNX2TRT_VERSION:STRING" in line:
                onnx2trt_version = line.split("=")[-1].replace("\n","")
                print(f"Detected ONNX2TRT version: {onnx2trt_version}")
else:
    print("Failed to detect CMakeCache.txt file. If you know what compute version your plugin.so is using, please type them")
    print("This is to allow other users to use the model with proper compute versioning.")
    print("This is crucial if you want to upload to huggingface.")
    opt_notfound = select_option("Do you know the compute version?", ['Yes', 'No'])
    if opt_notfound.lower() == 'yes':
        print("Type the Compute/CUDNN version in the following format: X.x example: 7.5")
        cudnn_version = input("Type: ")
        print("Compute/CUDNN version set. TensorRT, CUDA, and ONNX2TRT versions have not been configured, but they are not necessary.")
    else:
        print("Generated config file will not display compute version.")

parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model', help="Local Path to folder or HuggingFace ID to the diffuser model")
parser.add_argument('-o', '--output', default="./output", help="Output directory")
parser.add_argument('--build-dynamic-shape', action='store_true', help="Build TensorRT engines with dynamic image shapes.")
parser.add_argument('--hf-token', type=str, default="none", help="HuggingFace API access token for downloading model checkpoints")
parser.add_argument('-v', '--verbose', action='store_true', help="Enable Verbose")
args = parser.parse_args()

if os.path.exists(args.output):
    folder_exists = select_option(f"The output folder ({args.output}) already exists. It is possible that there already is a model inside of it. Abort? or delete and continue?",
    ['Abort', 'Delete & Continue'])
    if folder_exists.lower() == "abort":
        exit()
    elif folder_exists.lower() == "delete & continue":
        os.removedirs(args.output)

onnx_dir = os.path.join(args.output, 'onnx')
engine_dir = os.path.join(args.output, 'engine')
os.makedirs(onnx_dir, exist_ok=True)
os.makedirs(engine_dir, exist_ok=True)

hf_token = args.hf_token
device = "cuda"
verbose = args.verbose

max_batch_size = 16 if args.build_dynamic_shape is False else 4
opt_batch_size = max_batch_size
denoising_fp16 = True

models = {
    # 'clip': CLIP(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size),
    'unet_fp16': UNet(hf_token=hf_token, fp16=denoising_fp16, device=device, verbose=verbose, max_batch_size=max_batch_size),
    'vae': VAE(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size)
}

def get_model(type, path):
    if type in 'unet_fp16':
        #UNET
        tmp_model = UNet2DConditionModel.from_pretrained(
            path,
            subfolder="unet",
            use_auth_token=hf_token,
            torch_dtype = torch.float16
        ).to(device)
    elif type == 'clip':
        #CLIP
        tmp_model = CLIPTextModel.from_pretrained(
            path,
            subfolder="text_encoder",
            use_auth_token=hf_token,
        ).to(device)
    elif type == 'vae':
        #VAE
        tmp_model = AutoencoderKL.from_pretrained(
            path,
            subfolder="vae",
            use_auth_token=hf_token,
        ).to(device)
        tmp_model.forward = tmp_model.decode
    return tmp_model

#Just to fill
opt_image_height = 512
opt_image_width = 512

#check this later
onnx_opset = 16

for model_name, obj in models.items():
    engine = Engine(model_name, engine_dir)
    onnx_path = getModelPath(model_name, onnx_dir, opt=False)
    onnx_opt_path = getModelPath(model_name, onnx_dir, opt=True)

    print(f"Exporting model: {onnx_path}")
    #important: change model path to desired one
    model = get_model(model_name, args.model)
    #opt_batch_size does not necesairly means that it's going to be static batch size
    with torch.inference_mode(), torch.autocast("cuda"):
        inputs = obj.get_sample_input(opt_batch_size, opt_image_height, opt_image_width)
        torch.onnx.export(model,
                inputs,
                onnx_path,
                export_params=True,
                opset_version=onnx_opset,
                do_constant_folding=True,
                input_names = obj.get_input_names(),
                output_names = obj.get_output_names(),
                dynamic_axes=obj.get_dynamic_axes(),
        )
    print(f"Generating optimized ONNX model: {onnx_opt_path}")
    #minimal optimization flag was removed here for obvious reasons
    onnx_opt_graph = obj.optimize(onnx.load(onnx_path))
    onnx.save(onnx_opt_graph, onnx_opt_path)
    # Build engine
    print(f"Generating TensorRT model: {onnx_opt_path}")
    # Disable preview since it requires high levels of TRT version
    engine.build(onnx_opt_path, fp16=True , \
        input_profile=obj.get_input_profile(opt_batch_size, opt_image_height, opt_image_width, \
            static_batch=False, static_shape=not args.build_dynamic_shape), \
        enable_preview=False)

option = select_option("Upload model to HuggingFace?", ['Y', 'N'])
if option.lower() == "y":
    login()
    mkrepo = select_option("Create or use an existing repo?", ['CREATE', 'EXISTING'])
    print("The name MUST include your username. For ex.: chavinlo/AlienPop")
    repo_name = input("Repository Name:")
    #kinda confusing
    if mkrepo == 'CREATE':
        priv_opt = select_option("Make it private?", ['Y', 'N'])
        create_repo(repo_name, private=True if priv_opt.lower() == 'y' else False, repo_type="model")

    path_in_repo = "engine/"
    cuspath = select_option("By default the model will be uploaded on /engine, do you want to change this?", ['Y', 'N'])
    if cuspath.lower() == 'y':
        path_in_repo = input("Custom path:")

    revision = "main"
    cusrev = select_option("By default the model will be uploaded on the main branch, do you want to change this?", ['Y', 'N'])
    if cusrev.lower() == 'y':
        revision = input("Custom branch/revision:")

    print("The following will be the configuration file. This has been generated and is highly recommended to not edit it.")
    config = {
        "_class_name": "StableDiffusionAccelerated_Base",
        "_sda_version": "0.1",
        "_trt_version": trt_version,
        "_cuda_version": cuda_version,
        "_cudnn_version": cudnn_version,
        "_onnx2trt_version": onnx2trt_version,
        "UNET": {
            "precision": "fp16",
            "path": f"{path_in_repo}unet_fp16.plan"
        },
        "CLIP": {
            "path": f"{path_in_repo}clip.plan"
        },
        "VAE": {
            "path": f"{path_in_repo}vae.plan"
        }
    }
    print(config)
    print("Uploading...")
    api = HfApi()
    api.upload_folder(
        folder_path=engine_dir,
        path_in_repo=path_in_repo,
        repo_id=repo_name,
        repo_type="model"
    )

    #Model Card
    def_readme = """
# {MODEL_NAME} converted into TensorRT

Model converted from diffusers into TensorRT for accelerated inference up to 4x faster.

For how to use the model check https://github.com/chavinlo/sda-node

This model was automatically converted by SDA-node

Compilation configuration:

    """

    # Replace {MODEL_NAME} with args.model
    def_readme = def_readme.format(MODEL_NAME=args.model)

    # Convert the config dict to a json string
    config_json = json.dumps(config, indent=4)

    # Add the json string to the bottom of the markdown string, wrapped in markdown code blocks
    def_readme += "\n\n```json\n" + config_json + "\n```"
    file = io.BytesIO(def_readme.encode())

    api.upload_file(
        path_or_fileobj=file,
        path_in_repo="README.md",
        repo_id=repo_name,
        repo_type="model"
    )
    api.upload_file(
        path_or_fileobj=io.BytesIO(json.dumps(config, indent=4).encode()),
        path_in_repo="model_index.json",
        repo_id=repo_name,
        repo_type="model"
    )
    print("\n\n")
    print("Successfully uploaded")
    print(f"Uploaded into https://huggingface.co/{repo_name}")

print(f"Your model is available at: {os.path.abspath(engine_dir)}")
	#This module is meant for direct use only. For API-usage please check SDA-TRAINER.
	#Based off NVIDIA's demo
	import argparse
	from threads.trt.models import CLIP, UNet, VAE
	import os
	import onnx
	import torch
	from diffusers import UNet2DConditionModel, AutoencoderKL
	from transformers import CLIPTextModel
	from threads.trt.utilities import Engine
	import argparse
	import io
	from termcolor import colored
	from huggingface_hub import create_repo, login, HfApi
	import json

	def getModelPath(name, onnx_dir, opt=True):
	return os.path.join(onnx_dir, name+('.opt' if opt else '')+'.onnx')

	def select_option(start, options):
	print(start)
	while True:
	for i, option in enumerate(options):
	print(f"{i+1}. {option}")
	choice = input("Select an option:")
	try:
	choice = int(choice)
	if 1 <= choice <= len(options):
	print(colored(f"You selected {options[choice-1]}", 'green'))
	return options[choice-1]
	else:
	print(colored("Invalid selection. Please choose a number between 1 and", 'red'), len(options))
	except ValueError:
	print(colored("Invalid selection. Please enter a number.", 'red'))

	trt_version = "none"
	cuda_version = "none"
	cudnn_version = "none"
	onnx2trt_version = "none"

	plugin_path = os.environ['PLUGIN_LIBS']
	build_path = os.path.abspath(os.path.join(os.path.dirname(plugin_path), os.pardir))
	cmakecache_path = os.path.join(build_path, 'CMakeCache.txt')

	if os.path.exists(cmakecache_path):
	with open(cmakecache_path) as f:
	print("THE FOLLOWING VERSIONS WERE EXTRACTED FROM THE CMAKECACHE USED TO BUILD THE GIVEN PLUGIN.")
	for line in f:
	if "CMAKE_PROJECT_VERSION:STATIC" in line:
	trt_version = line.split("=")[-1].replace("\n","")
	print(f"Detected TensorRT version: {trt_version}")
	if "CUDA_VERSION:UNINITIALIZED" in line:
	cuda_version = line.split("=")[-1].replace("\n","")
	print(f"Detected CUDA version: {cuda_version}")
	if "CUDNN_VERSION:UNINITIALIZED" in line:
	cudnn_version = line.split("=")[-1].replace("\n","") #<-- aka compute version
	print(f"Detected CUDNN version: {cudnn_version}")
	if "ONNX2TRT_VERSION:STRING" in line:
	onnx2trt_version = line.split("=")[-1].replace("\n","")
	print(f"Detected ONNX2TRT version: {onnx2trt_version}")
	else:
	print("Failed to detect CMakeCache.txt file. If you know what compute version your plugin.so is using, please type them")
	print("This is to allow other users to use the model with proper compute versioning.")
	print("This is crucial if you want to upload to huggingface.")
	opt_notfound = select_option("Do you know the compute version?", ['Yes', 'No'])
	if opt_notfound.lower() == 'yes':
	print("Type the Compute/CUDNN version in the following format: X.x example: 7.5")
	cudnn_version = input("Type: ")
	print("Compute/CUDNN version set. TensorRT, CUDA, and ONNX2TRT versions have not been configured, but they are not necessary.")
	else:
	print("Generated config file will not display compute version.")

	parser = argparse.ArgumentParser()
	parser.add_argument('-m', '--model', help="Local Path to folder or HuggingFace ID to the diffuser model")
	parser.add_argument('-o', '--output', default="./output", help="Output directory")
	parser.add_argument('--build-dynamic-shape', action='store_true', help="Build TensorRT engines with dynamic image shapes.")
	parser.add_argument('--hf-token', type=str, default="none", help="HuggingFace API access token for downloading model checkpoints")
	parser.add_argument('-v', '--verbose', action='store_true', help="Enable Verbose")
	args = parser.parse_args()

	if os.path.exists(args.output):
	folder_exists = select_option(f"The output folder ({args.output}) already exists. It is possible that there already is a model inside of it. Abort? or delete and continue?",
	['Abort', 'Delete & Continue'])
	if folder_exists.lower() == "abort":
	exit()
	elif folder_exists.lower() == "delete & continue":
	os.removedirs(args.output)

	onnx_dir = os.path.join(args.output, 'onnx')
	engine_dir = os.path.join(args.output, 'engine')
	os.makedirs(onnx_dir, exist_ok=True)
	os.makedirs(engine_dir, exist_ok=True)

	hf_token = args.hf_token
	device = "cuda"
	verbose = args.verbose

	max_batch_size = 16 if args.build_dynamic_shape is False else 4
	opt_batch_size = max_batch_size
	denoising_fp16 = True

	models = {
	# 'clip': CLIP(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size),
	'unet_fp16': UNet(hf_token=hf_token, fp16=denoising_fp16, device=device, verbose=verbose, max_batch_size=max_batch_size),
	'vae': VAE(hf_token=hf_token, device=device, verbose=verbose, max_batch_size=max_batch_size)
	}

	def get_model(type, path):
	if type in 'unet_fp16':
	#UNET
	tmp_model = UNet2DConditionModel.from_pretrained(
	path,
	subfolder="unet",
	use_auth_token=hf_token,
	torch_dtype = torch.float16
	).to(device)
	elif type == 'clip':
	#CLIP
	tmp_model = CLIPTextModel.from_pretrained(
	path,
	subfolder="text_encoder",
	use_auth_token=hf_token,
	).to(device)
	elif type == 'vae':
	#VAE
	tmp_model = AutoencoderKL.from_pretrained(
	path,
	subfolder="vae",
	use_auth_token=hf_token,
	).to(device)
	tmp_model.forward = tmp_model.decode
	return tmp_model

	#Just to fill
	opt_image_height = 512
	opt_image_width = 512

	#check this later
	onnx_opset = 16

	for model_name, obj in models.items():
	engine = Engine(model_name, engine_dir)
	onnx_path = getModelPath(model_name, onnx_dir, opt=False)
	onnx_opt_path = getModelPath(model_name, onnx_dir, opt=True)

	print(f"Exporting model: {onnx_path}")
	#important: change model path to desired one
	model = get_model(model_name, args.model)
	#opt_batch_size does not necesairly means that it's going to be static batch size
	with torch.inference_mode(), torch.autocast("cuda"):
	inputs = obj.get_sample_input(opt_batch_size, opt_image_height, opt_image_width)
	torch.onnx.export(model,
	inputs,
	onnx_path,
	export_params=True,
	opset_version=onnx_opset,
	do_constant_folding=True,
	input_names = obj.get_input_names(),
	output_names = obj.get_output_names(),
	dynamic_axes=obj.get_dynamic_axes(),
	)
	print(f"Generating optimized ONNX model: {onnx_opt_path}")
	#minimal optimization flag was removed here for obvious reasons
	onnx_opt_graph = obj.optimize(onnx.load(onnx_path))
	onnx.save(onnx_opt_graph, onnx_opt_path)
	# Build engine
	print(f"Generating TensorRT model: {onnx_opt_path}")
	# Disable preview since it requires high levels of TRT version
	engine.build(onnx_opt_path, fp16=True , \
	input_profile=obj.get_input_profile(opt_batch_size, opt_image_height, opt_image_width, \
	static_batch=False, static_shape=not args.build_dynamic_shape), \
	enable_preview=False)

	option = select_option("Upload model to HuggingFace?", ['Y', 'N'])
	if option.lower() == "y":
	login()
	mkrepo = select_option("Create or use an existing repo?", ['CREATE', 'EXISTING'])
	print("The name MUST include your username. For ex.: chavinlo/AlienPop")
	repo_name = input("Repository Name:")
	#kinda confusing
	if mkrepo == 'CREATE':
	priv_opt = select_option("Make it private?", ['Y', 'N'])
	create_repo(repo_name, private=True if priv_opt.lower() == 'y' else False, repo_type="model")

	path_in_repo = "engine/"
	cuspath = select_option("By default the model will be uploaded on /engine, do you want to change this?", ['Y', 'N'])
	if cuspath.lower() == 'y':
	path_in_repo = input("Custom path:")

	revision = "main"
	cusrev = select_option("By default the model will be uploaded on the main branch, do you want to change this?", ['Y', 'N'])
	if cusrev.lower() == 'y':
	revision = input("Custom branch/revision:")

	print("The following will be the configuration file. This has been generated and is highly recommended to not edit it.")
	config = {
	"_class_name": "StableDiffusionAccelerated_Base",
	"_sda_version": "0.1",
	"_trt_version": trt_version,
	"_cuda_version": cuda_version,
	"_cudnn_version": cudnn_version,
	"_onnx2trt_version": onnx2trt_version,
	"UNET": {
	"precision": "fp16",
	"path": f"{path_in_repo}unet_fp16.plan"
	},
	"CLIP": {
	"path": f"{path_in_repo}clip.plan"
	},
	"VAE": {
	"path": f"{path_in_repo}vae.plan"
	}
	}
	print(config)
	print("Uploading...")
	api = HfApi()
	api.upload_folder(
	folder_path=engine_dir,
	path_in_repo=path_in_repo,
	repo_id=repo_name,
	repo_type="model"
	)

	#Model Card
	def_readme = """
	# {MODEL_NAME} converted into TensorRT

	Model converted from diffusers into TensorRT for accelerated inference up to 4x faster.

	For how to use the model check https://github.com/chavinlo/sda-node

	This model was automatically converted by SDA-node

	Compilation configuration:

	"""

	# Replace {MODEL_NAME} with args.model
	def_readme = def_readme.format(MODEL_NAME=args.model)

	# Convert the config dict to a json string
	config_json = json.dumps(config, indent=4)

	# Add the json string to the bottom of the markdown string, wrapped in markdown code blocks
	def_readme += "\n\n```json\n" + config_json + "\n```"
	file = io.BytesIO(def_readme.encode())

	api.upload_file(
	path_or_fileobj=file,
	path_in_repo="README.md",
	repo_id=repo_name,
	repo_type="model"
	)
	api.upload_file(
	path_or_fileobj=io.BytesIO(json.dumps(config, indent=4).encode()),
	path_in_repo="model_index.json",
	repo_id=repo_name,
	repo_type="model"
	)
	print("\n\n")
	print("Successfully uploaded")
	print(f"Uploaded into https://huggingface.co/{repo_name}")

	print(f"Your model is available at: {os.path.abspath(engine_dir)}")